diff --git a/.gitignore b/.gitignore index 0a677773713..8ba285c898a 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ work/ # Claude planning files plans/ +.xqts-runner/ diff --git a/PR-DESCRIPTION.md b/PR-DESCRIPTION.md new file mode 100644 index 00000000000..6db63906630 --- /dev/null +++ b/PR-DESCRIPTION.md @@ -0,0 +1,207 @@ +## Summary + +Implements XQuery 4.0 parser and runtime support for eXist-db, covering the majority of the QT4CG specification draft syntax, 50+ new standard functions, and enhanced existing functions. This brings eXist-db in line with the evolving XQuery 4.0 standard alongside BaseX and Saxon. + +This PR is part of the [XQuery 4.0 master plan](https://github.com/eXist-db/exist/issues/XXXX) and covers: +- **Parser**: All major XQ4 syntax additions via ANTLR 2 grammar extensions +- **Functions**: 50+ new `fn:` functions and enhancements to existing functions +- **Map/Array modules**: Ordered maps, 6 new map functions, 4 new array functions +- **Error codes**: Spec-compliant error code alignment across type checking +- **Parameter names**: W3C catalog alignment for keyword argument support + +## What Changed + +### Grammar changes (XQuery.g + XQueryTree.g) + +| Feature | Spec Reference | Status | +|---------|---------------|--------| +| Focus functions: `fn { expr }` | PR2200 | Complete | +| Keyword arguments: `name := expr` | PR197 | Complete | +| Default parameter values: `$param := default` | PR197 | Complete | +| String templates: `` `Hello {$name}` `` | PR254 | Complete | +| Pipeline operator: `expr => func` | PR510 | Complete | +| Mapping arrow: `expr =!> func` | PR510 | Complete | +| `for member` clause | PR1172 | Complete | +| `otherwise` expression | PR795 | Complete | +| Braced if: `if (cond) { expr }` | — | Complete | +| `while` clause in FLWOR | — | Complete | +| `try`/`catch`/`finally` | — | Complete | +| Ternary conditional: `?? !!` | — | Complete | +| QName literals: `#name` | — | Complete | +| Hex/binary integer literals | — | Complete | +| Numeric underscores: `1_000_000` | — | Complete | +| Array/map filter: `?[predicate]` | — | Complete | +| Choice/union item types | — | Complete | +| Enumeration types: `enum("a","b")` | — | Complete | +| Method call operator: `=?>` | — | Complete | +| Let destructuring | — | Complete | +| `fn(...)` type shorthand | — | Complete | +| `declare context value` | — | Complete | +| `xquery version "4.0"` | — | Complete | +| Braced switch/typeswitch | — | Complete | +| Unicode `×` multiplication sign | — | Complete | +| `reservedKeywords` sub-rule refactoring | — | Complete | + +### Expression classes (30 files) + +New expression classes for XQ4 runtime semantics: + +| Class | Purpose | +|-------|---------| +| `FocusFunction` | `fn { expr }` with implicit context item binding | +| `KeywordArgumentExpression` | `name := expr` argument passing | +| `MappingArrowOperator` | `=!>` with sequence mapping semantics | +| `MethodCallOperator` | `=?>` method dispatch | +| `PipelineExpression` | `=>` left-to-right function chaining | +| `OtherwiseExpression` | Fallback when left side is empty | +| `WhileClause` | FLWOR `while (condition)` iteration | +| `ForMemberExpr` / `ForKeyValueExpr` | Array/map iteration | +| `LetDestructureExpr` | `let ($a, $b) := sequence` | +| `FilterExprAM` | `?[predicate]` array/map filtering | +| `ChoiceCastExpression` / `ChoiceCastableExpression` | Union type casting | +| `EnumCastExpression` | `enum("a","b")` validation | +| `FunctionParameterFunctionSequenceType` | HOF parameter type with arity checking | + +Modified classes include `Function` (keyword arg resolution), `FunctionSignature` (default params), `UserDefinedFunction` (default param binding), `TryCatchExpression` (finally clause), `SwitchExpression` (XQ4 version gating), `StringConstructor` (atomization fixes), and `XQueryContext` (version 4.0 recognition). + +### XQ4 functions (50+ new, 18 enhanced) + +**New function implementations:** + +| Category | Functions | +|----------|----------| +| Sequence | `fn:characters`, `fn:foot`, `fn:trunk`, `fn:items-at`, `fn:slice`, `fn:replicate`, `fn:insert-separator` | +| Comparison | `fn:all-equal`, `fn:all-different`, `fn:duplicate-values`, `fn:atomic-equal`, `fn:highest`, `fn:lowest` | +| Higher-order | `fn:every`, `fn:some`, `fn:partition`, `fn:scan-left`, `fn:scan-right`, `fn:op`, `fn:partial-apply` | +| Subsequence | `fn:contains-subsequence`, `fn:starts-with-subsequence`, `fn:ends-with-subsequence`, `fn:subsequence-where` | +| URI/String | `fn:parse-uri`, `fn:build-uri`, `fn:decode-from-uri`, `fn:char`, `fn:characters` | +| Type/Reflection | `fn:type-of`, `fn:atomic-type-annotation`, `fn:node-type-annotation`, `fn:function-annotations`, `fn:function-identity`, `fn:is-NaN`, `fn:identity`, `fn:void` | +| Date/Time | `fn:civil-timezone`, `fn:seconds`, `fn:unix-dateTime` | +| Hash | `fn:hash` (MD5, SHA-1, SHA-256, SHA-384, SHA-512, BLAKE3) | +| CSV | `fn:csv`, `fn:parse-csv`, `fn:csv-to-arrays` | +| Names | `fn:parse-QName`, `fn:expanded-QName`, `fn:parse-integer` | +| Navigation | `fn:transitive-closure`, `fn:element-to-map`, `fn:distinct-ordered-nodes`, `fn:siblings`, `fn:in-scope-namespaces` | +| Misc | `fn:sort-by`, `fn:divide-decimals`, `fn:message`, `fn:deep-equal` (options map) | + +**Enhanced existing functions:** + +| Function | Enhancement | +|----------|-------------| +| `fn:compare` | XQ4 `anyAtomicType`, numeric total order, duration/datetime ordering | +| `fn:min`/`fn:max` | Comparison function parameter | +| `fn:deep-equal` | Options map (debug, flags, collation) | +| `fn:matches`/`fn:tokenize` | XQ4 regex flags (`!` for XPath, unnamed capture groups) | +| `fn:replace` | `c` flag, empty match handling, function replacement parameter | +| `fn:round` | 3-argument `$mode` overload (half-up, half-down, etc.) | +| Collations | Fixed supplementary codepoint comparison; ASCII case-insensitive collator | + +### Map module enhancements (6 files) + +- **Ordered maps**: Maps preserve insertion order (backed by `LinkedHashMap`) +- **New functions**: `map:keys-where`, `map:filter`, `map:build`, `map:pair`, `map:of-pairs`, `map:values-of`, `map:index` +- **Cross-type numeric key equality**: `map { 1: "a" }?1.0` works correctly + +### Array module enhancements + +- `array:index-where`, `array:slice`, `array:sort-by`, `array:sort-with` + +### Error code alignment (26 files) + +Aligned error codes with the W3C specification across type casting, cardinality checks, and treat-as expressions: + +| Component | Change | Impact | +|-----------|--------|--------| +| `convertTo()` in 20 atomic types | `FORG0001` → `XPTY0004` for type-incompatible casts | +510 tests | +| `DoubleValue` | NaN/INF → integer/decimal: `FOCA0002` | +48 tests | +| `DynamicCardinalityCheck` | Generic `ERROR` → `XPTY0004` (or `XPDY0050` for treat-as) | +5 tests | +| `DynamicTypeCheck` | `FOCH0002` → `XPTY0004` (overridable for treat-as) | +1 test | +| `TreatAsExpression` | Passes `XPDY0050` to type/cardinality checks | +17 tests | + +### Parameter name alignment (59 files) + +Renamed function parameter names across 59 `fn:` module files to match the W3C XQuery 4.0 Functions and Operators catalog. This enables keyword argument support (`name := value`) with the standard parameter names. Primary renames: `$arg` → `$value`, `$arg` → `$input`, etc. + +### Tests + +- **`fnXQuery40.xql`**: Comprehensive XQSuite test file covering all XQ4 features (2491 lines) +- Updated `fnHigherOrderFunctions.xql`, `replace.xqm`, `fnLanguage.xqm`, `InspectModuleTest.java` +- New `deep-equal-options-test.xq` for XQ4 deep-equal options map + +## Spec References + +- [QT4CG XQuery 4.0 Draft](https://qt4cg.org/specifications/xquery-40/) +- [QT4CG XPath/XQuery Functions 4.0](https://qt4cg.org/specifications/xpath-functions-40/) +- Key proposals: PR197 (keyword args), PR254 (string templates), PR510 (pipeline/mapping arrow), PR795 (otherwise), PR1172 (for member), PR2200 (fn keyword/focus functions) + +## XQTS Results + +QT4 XQTS test sets, run against the consolidated branch (2026-03-14): + +| Test Set | Tests | Passed | Failed | Errors | Pass Rate | +|----------|-------|--------|--------|--------|-----------| +| misc-BuiltInKeywords | 297 | 215 | 79 | 3 | 72.4% | +| prod-ArrowExpr | 70 | 67 | 3 | 0 | 95.7% | +| prod-CastExpr | 2803 | 2613 | 187 | 3 | 93.2% | +| prod-CountClause | 13 | 12 | 1 | 0 | 92.3% | +| prod-DynamicFunctionCall | 88 | 33 | 54 | 1 | 37.5% | +| prod-FLWORExpr | 21 | 21 | 0 | 0 | 100.0% | +| prod-FunctionDecl | 228 | 175 | 53 | 0 | 76.8% | +| prod-GroupByClause | 40 | 36 | 2 | 2 | 90.0% | +| prod-IfExpr | 43 | 42 | 1 | 0 | 97.7% | +| prod-InlineFunctionExpr | 46 | 37 | 7 | 2 | 80.4% | +| prod-InstanceofExpr | 319 | 310 | 9 | 0 | 97.2% | +| prod-Lookup | 131 | 116 | 13 | 2 | 88.5% | +| prod-NamedFunctionRef | 564 | 520 | 42 | 2 | 92.2% | +| prod-OrderByClause | 206 | 204 | 1 | 1 | 99.0% | +| prod-QuantifiedExpr | 215 | 204 | 11 | 0 | 94.9% | +| prod-StringTemplate | 53 | 52 | 1 | 0 | 98.1% | +| prod-SwitchExpr | 38 | 38 | 0 | 0 | 100.0% | +| prod-TreatExpr | 73 | 72 | 1 | 0 | 98.6% | +| prod-TryCatchExpr | 193 | 163 | 30 | 0 | 84.5% | +| prod-TypeswitchExpr | 74 | 72 | 2 | 0 | 97.3% | +| prod-UnaryLookup | 37 | 31 | 4 | 2 | 83.8% | +| prod-WhereClause | 85 | 78 | 7 | 0 | 91.8% | +| prod-WindowClause | 158 | 125 | 33 | 0 | 79.1% | +| **Total** | **5795** | **5236** | **541** | **18** | **90.4%** | + +**Test sets at 100%:** prod-FLWORExpr, prod-SwitchExpr + +**XQSuite:** 1316 tests, 0 failures, 9 skipped + +### Failure analysis + +The remaining failures are primarily: + +| Category | Count | Notes | +|----------|-------|-------| +| Record types / type infrastructure | ~120 | Requires XQ4 record type system (not yet implemented) | +| Unimplemented functions | ~80 | Functions not yet available in eXist-db | +| Error code mismatches | ~80 | Generic `ERROR` vs specific codes in validation routines | +| XQ4 no-namespace functions | ~40 | PR2200 allows overriding `fn:` namespace (architectural change) | +| Parser type syntax | ~30 | Record/union types in function signatures | +| Pre-existing issues | ~20 | Failures also present on develop | +| Window clause | ~30 | XQ4 window clause extensions | +| Other | ~30 | Various edge cases | + +## Limitations + +The following XQuery 4.0 features are **not** implemented in this PR: + +- **Record types** (`record(name as xs:string, age as xs:integer)`) — requires new type infrastructure +- **Union types in type declarations** — parser accepts but runtime support is limited +- **JNode / JSON node types** — requires new data model layer +- **`declare context value`** — parsed as synonym but not fully enforced +- **Method calls (`=?>`)** — parsed but limited to simple dispatch +- **No-namespace function overriding** (PR2200) — `fn:` namespace functions cannot yet be overridden by unprefixed declarations +- **Version gating** — XQ4 features are available regardless of `xquery version` declaration; no XQ3.1-only mode +- **XML Schema revalidation** — not applicable to eXist-db + +## Test Plan + +- [x] XQSuite: 1316 tests, 0 failures +- [x] QT4 XQTS: 5236/5795 (90.4%) across 23 parser-related test sets +- [ ] Full `mvn test` on CI +- [ ] XQTS comparison against develop baseline +- [ ] Review by @duncdrum + +Co-Authored-By: Claude Opus 4.6 (1M context) diff --git a/exist-core/pom.xml b/exist-core/pom.xml index 98a1cdd5733..2843f18452c 100644 --- a/exist-core/pom.xml +++ b/exist-core/pom.xml @@ -390,6 +390,11 @@ Saxon-HE + + de.bottlecaps + markup-blitz + + org.exist-db exist-saxon-regex @@ -1191,6 +1196,7 @@ The BaseX Team. The original license statement is also included below.]]> + 600 ${skipUnitTests} @{jacocoArgLine} -Dfile.encoding=${project.build.sourceEncoding} -Dexist.recovery.progressbar.hide=true @@ -1200,6 +1206,7 @@ The BaseX Team. The original license statement is also included below.]]>${project.build.testOutputDirectory}/log4j2.xml + 180 + + + org.exist.storage.lock.DeadlockIT + org.exist.xmldb.RemoveCollectionIT + @{jacocoArgLine} -Dfile.encoding=${project.build.sourceEncoding} -Dexist.recovery.progressbar.hide=true ${project.basedir}/../exist-jetty-config/target/classes/org/exist/jetty diff --git a/exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g b/exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g index d852d700444..3737b2ba1fe 100644 --- a/exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g +++ b/exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g @@ -83,6 +83,7 @@ options { protected Deque> globalStack = new ArrayDeque<>(); protected Deque elementStack = new ArrayDeque<>(); protected XQueryLexer lexer; + protected boolean xq4Enabled = false; public XQueryParser(XQueryLexer lexer) { this((TokenStream)lexer); @@ -90,6 +91,8 @@ options { setASTNodeClass("org.exist.xquery.parser.XQueryAST"); } + public boolean isXQ4() { return xq4Enabled; } + public boolean foundErrors() { return foundError; } @@ -192,6 +195,28 @@ imaginaryTokenDefinitions PREVIOUS_ITEM NEXT_ITEM WINDOW_VARS + FOCUS_FUNCTION + KEYWORD_ARG + FOR_MEMBER + STRING_TEMPLATE + FOR_KEY + FOR_VALUE + FOR_KEY_VALUE + VALUE_VAR + SWITCH_BOOLEAN + MAPPING_ARROW + FILTER_AM + QNAME_LITERAL + PARAM_DEFAULT + CHOICE_TYPE + ENUM_TYPE + TERNARY + SEQ_DESTRUCTURE + ARRAY_DESTRUCTURE + MAP_DESTRUCTURE + DESTRUCTURE_VAR_TYPE + RECORD_TEST + RECORD_FIELD ; // === XPointer === @@ -272,7 +297,7 @@ prolog throws XPathException ( "declare" "variable" ) => varDeclUp { inSetters = false; } | - ( "declare" "context" "item" ) + ( "declare" "context" ("item" | "value") ) => contextItemDeclUp { inSetters = false; } | ( "declare" MOD ) @@ -292,7 +317,12 @@ importDecl throws XPathException versionDecl throws XPathException : "xquery" "version" v:STRING_LITERAL ( "encoding"! enc:STRING_LITERAL )? - { #versionDecl = #(#[VERSION_DECL, v.getText()], enc); } + { + #versionDecl = #(#[VERSION_DECL, v.getText()], enc); + if ("4.0".equals(v.getText())) { + xq4Enabled = true; + } + } ; setter @@ -441,7 +471,7 @@ contextItemDeclUp! throws XPathException contextItemDecl [XQueryAST decl] throws XPathException : - "context"! "item"! ( typeDeclaration )? + "context"! ( "item"! | "value"! ) ( typeDeclaration )? ( COLON! EQ! e1:expr | @@ -464,10 +494,22 @@ annotation String name= null; } : - MOD! name=eqName! (LPAREN! literal (COMMA! literal)* RPAREN!)? + MOD! name=eqName! (LPAREN! annotationLiteral (COMMA! annotationLiteral)* RPAREN!)? { #annotation= #(#[ANNOT_DECL, name], #annotation); } ; +// XQ4: annotation parameters support literals, true(), false(), and negated numeric literals +// Note: true()/false() must be matched via NCNAME + semantic predicate, NOT as "true"/"false" keywords. +// Using quoted keyword syntax would register them in testLiterals, breaking true()/false() function +// calls throughout the grammar (ANTLR 2 converts all NCNAMEs matching keywords to LITERAL_xxx tokens). +annotationLiteral +: + literal + | ( { LT(1).getText().equals("true") || LT(1).getText().equals("false") }? b:NCNAME LPAREN! RPAREN! + { #annotationLiteral = #[STRING_LITERAL, #b.getText()]; #b = null; } ) + | MINUS! n:numericLiteral { #n.setText("-" + #n.getText()); #annotationLiteral = #n; } + ; + eqName returns [String name] { name= null; } : @@ -550,7 +592,10 @@ param throws XPathException { String varName= null; } : DOLLAR! varName=eqName ( t:typeDeclaration )? - { #param= #(#[VARIABLE_BINDING, varName], #t); } + ( ( { xq4Enabled }? COLON EQ ) => COLON! EQ! pd:exprSingle! + { #pd = #(#[PARAM_DEFAULT, "param-default"], #pd); } + )? + { #param= #(#[VARIABLE_BINDING, varName], #t, #pd); } ; uriList throws XPathException @@ -588,10 +633,16 @@ itemType throws XPathException | ( "function" LPAREN ) => functionTest | + ( "fn" LPAREN ) => fnShorthandFunctionTest + | ( "map" LPAREN ) => mapType | ( "array" LPAREN ) => arrayType | + ( "record" LPAREN ) => recordType + | + ( "enum" LPAREN ) => enumType + | ( LPAREN ) => parenthesizedItemType | ( . LPAREN ) => kindTest @@ -600,13 +651,51 @@ itemType throws XPathException ; parenthesizedItemType throws XPathException +{ int count = 0; } +: + LPAREN! itemType { count++; } ( UNION! itemType { count++; } )* RPAREN! + { + if (count > 1) { + #parenthesizedItemType = #(#[CHOICE_TYPE, "choice-type"], #parenthesizedItemType); + } + } + ; + +enumType throws XPathException +{ List enumValues = new ArrayList(); } : - LPAREN! itemType RPAREN! + e:"enum"! LPAREN! + s1:STRING_LITERAL! { enumValues.add(s1.getText()); } + ( COMMA! s2:STRING_LITERAL! { enumValues.add(s2.getText()); } )* + RPAREN! + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < enumValues.size(); i++) { + if (i > 0) sb.append(","); + sb.append(enumValues.get(i)); + } + #enumType = #(#[ENUM_TYPE, sb.toString()]); + #enumType.copyLexInfo(#e); + } ; singleType throws XPathException +{ int count = 0; } : - atomicType ( QUESTION )? + ( + ( "enum" LPAREN ) => enumType ( QUESTION )? + | + ( LPAREN ) => + LPAREN! atomicType { count++; } ( UNION! atomicType { count++; } )* RPAREN! + { + if (count > 1) { + #singleType = #(#[CHOICE_TYPE, "choice-type"], #singleType); + } + } + ( QUESTION )? + | + atomicType ( QUESTION )? + ) ; atomicType throws XPathException @@ -634,10 +723,38 @@ anyFunctionTest throws XPathException typedFunctionTest throws XPathException : - "function"! LPAREN! (sequenceType (COMMA! sequenceType)*)? RPAREN! "as" sequenceType + "function"! LPAREN! (fnShorthandParam (COMMA! fnShorthandParam)*)? RPAREN! "as" sequenceType { #typedFunctionTest = #(#[FUNCTION_TEST, "anyFunction"], #typedFunctionTest); } ; +// XQ4: fn(...) as shorthand for function(...) in type positions +fnShorthandFunctionTest throws XPathException +: + ( "fn" LPAREN STAR RPAREN) => fnShorthandAnyFunctionTest + | + fnShorthandTypedFunctionTest + ; + +fnShorthandAnyFunctionTest throws XPathException +: + "fn"! LPAREN! s2:STAR RPAREN! + { #fnShorthandAnyFunctionTest = #(#[FUNCTION_TEST, "anyFunction"], #s2); } + ; + +fnShorthandTypedFunctionTest throws XPathException +: + "fn"! LPAREN! (fnShorthandParam (COMMA! fnShorthandParam)*)? RPAREN! "as" sequenceType + { #fnShorthandTypedFunctionTest = #(#[FUNCTION_TEST, "anyFunction"], #fnShorthandTypedFunctionTest); } + ; + +// XQ4: fn() type parameters can optionally have names: fn($name as type, ...) +fnShorthandParam throws XPathException +: + ( DOLLAR ) => DOLLAR! eqName! "as"! sequenceType + | + sequenceType + ; + mapType throws XPathException : ( "map" LPAREN STAR ) => anyMapTypeTest @@ -686,6 +803,50 @@ arrayTypeTest throws XPathException } ; +recordType throws XPathException +: + ( "record" LPAREN STAR ) => anyRecordTypeTest + | + ( "record" LPAREN RPAREN ) => emptyRecordTypeTest + | + recordTypeTest + ; + +anyRecordTypeTest throws XPathException +: + m:"record"! LPAREN! s:STAR RPAREN! + { + #anyRecordTypeTest = #(#[RECORD_TEST, "record"], #s); + #anyRecordTypeTest.copyLexInfo(#m); + } + ; + +emptyRecordTypeTest throws XPathException +: + m:"record"! LPAREN! RPAREN! + { + #emptyRecordTypeTest = #(#[RECORD_TEST, "record"]); + #emptyRecordTypeTest.copyLexInfo(#m); + } + ; + +recordTypeTest throws XPathException +: + m:"record"! LPAREN! recordFieldDecl ( COMMA! ( STAR | recordFieldDecl ) )* RPAREN! + { + #recordTypeTest = #(#[RECORD_TEST, "record"], #recordTypeTest); + } + ; + +recordFieldDecl throws XPathException +{ String fieldName = null; } +: + fieldName=ncnameOrKeyword! ( QUESTION )? ( "as"! sequenceType )? + { + #recordFieldDecl = #(#[RECORD_FIELD, fieldName], #recordFieldDecl); + } + ; + // === Expressions === queryBody throws XPathException: expr ; @@ -702,7 +863,7 @@ expr throws XPathException exprSingle throws XPathException : - ( ( "for" | "let" ) ("tumbling" | "sliding" | DOLLAR ) ) => flworExpr + ( ( "for" | "let" ) ("tumbling" | "sliding" | "member" | "key" | "value" | DOLLAR) ) => flworExpr | ( "try" LCURLY ) => tryCatchExpr | ( ( "some" | "every" ) DOLLAR ) => quantifiedExpr | ( "if" LPAREN ) => ifExpr @@ -752,11 +913,14 @@ renameExpr throws XPathException "rename" exprSingle "as"! exprSingle ; -// === try/catch === +// === try/catch/finally === tryCatchExpr throws XPathException : "try"^ LCURLY! tryTargetExpr RCURLY! - (catchClause)+ + ( + (catchClause)+ ( { xq4Enabled }? finallyClause )? + | { xq4Enabled }? finallyClause + ) ; tryTargetExpr throws XPathException @@ -769,6 +933,11 @@ catchClause throws XPathException "catch"^ catchErrorList (catchVars)? LCURLY! expr RCURLY! ; +finallyClause throws XPathException +: + "finally"^ LCURLY! (expr)? RCURLY! + ; + catchErrorList throws XPathException : nameTest (UNION! nameTest)* @@ -809,14 +978,14 @@ flworExpr throws XPathException initialClause throws XPathException : - ( ( "for" DOLLAR ) => forClause + ( ( "for" ( "member" | "key" | "value" | DOLLAR ) ) => forClause | ( "for" ( "tumbling" | "sliding" ) ) => windowClause | letClause ) ; intermediateClause throws XPathException : - ( initialClause | whereClause | groupByClause | orderByClause | countClause ) + ( initialClause | whereClause | whileClause | groupByClause | orderByClause | countClause ) ; whereClause throws XPathException @@ -824,6 +993,11 @@ whereClause throws XPathException "where"^ exprSingle ; +whileClause throws XPathException +: + { xq4Enabled }? "while"^ exprSingle + ; + countClause throws XPathException { String varName; } : @@ -833,7 +1007,77 @@ countClause throws XPathException forClause throws XPathException : - "for"^ inVarBinding ( COMMA! inVarBinding )* + "for"^ forBinding ( COMMA! forBinding )* + ; + +forBinding throws XPathException +: + ( { xq4Enabled }? "member" ) => memberVarBinding + | ( { xq4Enabled }? "key" ) => keyVarBinding + | ( { xq4Enabled }? "value" ) => valueVarBinding + | inVarBinding + ; + +memberVarBinding throws XPathException +{ String varName; } +: + "member"! DOLLAR! varName=v:varName! ( typeDeclaration )? + ( positionalVar )? + "in"! exprSingle + { + #memberVarBinding= #(#[VARIABLE_BINDING, varName], #memberVarBinding); + #memberVarBinding.copyLexInfo(#v); + #memberVarBinding= #(#[FOR_MEMBER, null], #memberVarBinding); + } + ; + +keyVarBinding throws XPathException +{ String varName; } +: + "key"! DOLLAR! varName=v:varName! ( typeDeclaration )? + ( + ( "value" DOLLAR ) => keyValueVarPart + )? + ( positionalVar )? + "in"! exprSingle + { + #keyVarBinding= #(#[VARIABLE_BINDING, varName], #keyVarBinding); + #keyVarBinding.copyLexInfo(#v); + // Check if we have a value variable (keyValueVarPart was matched) + boolean hasValueVar = false; + AST child = #keyVarBinding.getFirstChild(); + while (child != null) { + if (child.getType() == VALUE_VAR) { hasValueVar = true; break; } + child = child.getNextSibling(); + } + if (hasValueVar) { + #keyVarBinding= #(#[FOR_KEY_VALUE, null], #keyVarBinding); + } else { + #keyVarBinding= #(#[FOR_KEY, null], #keyVarBinding); + } + } + ; + +keyValueVarPart throws XPathException +{ String valueVarName; } +: + "value"! DOLLAR! valueVarName=varName! ( typeDeclaration )? + { + #keyValueVarPart = #(#[VALUE_VAR, valueVarName], #keyValueVarPart); + } + ; + +valueVarBinding throws XPathException +{ String varName; } +: + "value"! DOLLAR! varName=v:varName! ( typeDeclaration )? + ( positionalVar )? + "in"! exprSingle + { + #valueVarBinding= #(#[VARIABLE_BINDING, varName], #valueVarBinding); + #valueVarBinding.copyLexInfo(#v); + #valueVarBinding= #(#[FOR_VALUE, null], #valueVarBinding); + } ; letClause throws XPathException @@ -904,6 +1148,16 @@ windowVars throws XPathException letVarBinding throws XPathException { String varName; } : + // XQ4: sequence destructuring - let $($x, $y) := expr + ( DOLLAR LPAREN ) => letDestructureSeq + | + // XQ4: array destructuring - let $[$x, $y] := expr + ( DOLLAR LPPAREN ) => letDestructureArray + | + // XQ4: map destructuring - let ${$x, $y} := expr + ( DOLLAR LCURLY ) => letDestructureMap + | + // Standard let binding DOLLAR! varName=v:varName! ( typeDeclaration )? COLON! EQ! exprSingle { @@ -912,6 +1166,67 @@ letVarBinding throws XPathException } ; +// XQ4: Per-variable type annotations: "x+,y" means $x has a DESTRUCTURE_VAR_TYPE child, $y does not +letDestructureSeq throws XPathException +{ String vn; + StringBuilder sb = new StringBuilder(); } +: + d:DOLLAR! LPAREN! + DOLLAR! vn=varName! { sb.append(vn); } + ( destructureVarType { sb.append("+"); } )? + ( COMMA! DOLLAR! vn=varName! { sb.append(",").append(vn); } + ( destructureVarType { sb.append("+"); } )? )* + RPAREN! ( typeDeclaration )? + COLON! EQ! exprSingle + { + #letDestructureSeq = #(#[SEQ_DESTRUCTURE, sb.toString()], #letDestructureSeq); + #letDestructureSeq.copyLexInfo(#d); + } + ; + +letDestructureArray throws XPathException +{ String vn; + StringBuilder sb = new StringBuilder(); } +: + d:DOLLAR! LPPAREN! + DOLLAR! vn=varName! { sb.append(vn); } + ( destructureVarType { sb.append("+"); } )? + ( COMMA! DOLLAR! vn=varName! { sb.append(",").append(vn); } + ( destructureVarType { sb.append("+"); } )? )* + RPPAREN! ( typeDeclaration )? + COLON! EQ! exprSingle + { + #letDestructureArray = #(#[ARRAY_DESTRUCTURE, sb.toString()], #letDestructureArray); + #letDestructureArray.copyLexInfo(#d); + } + ; + +letDestructureMap throws XPathException +{ String vn; + StringBuilder sb = new StringBuilder(); } +: + d:DOLLAR! LCURLY! + DOLLAR! vn=varName! { sb.append(vn); } + ( destructureVarType { sb.append("+"); } )? + ( COMMA! DOLLAR! vn=varName! { sb.append(",").append(vn); } + ( destructureVarType { sb.append("+"); } )? )* + RCURLY! ( typeDeclaration )? + COLON! EQ! exprSingle + { + #letDestructureMap = #(#[MAP_DESTRUCTURE, sb.toString()], #letDestructureMap); + #letDestructureMap.copyLexInfo(#d); + } + ; + +// Helper: wraps typeDeclaration in DESTRUCTURE_VAR_TYPE imaginary token +destructureVarType throws XPathException +: + td:typeDeclaration + { + #destructureVarType = #(#[DESTRUCTURE_VAR_TYPE, "vartype"], #td); + } + ; + orderByClause throws XPathException : ( "order"! "by"! | "stable"! "order"! "by"! ) orderSpecList @@ -973,9 +1288,26 @@ quantifiedInVarBinding throws XPathException switchExpr throws XPathException : - "switch"^ LPAREN! expr RPAREN! - ( switchCaseClause )+ - "default" "return"! exprSingle + "switch"^ LPAREN! + ( + // XQ4 omitted comparand - boolean mode: switch () { case boolExpr return ... } + ( RPAREN ) => + RPAREN! switchBooleanMarker + | + expr RPAREN! + ) + ( + // XQ4 braced syntax: switch (...) { case ... default ... } + ( LCURLY "case" ) => + LCURLY! ( switchCaseClause )+ "default" "return"! exprSingle RCURLY! + | + ( switchCaseClause )+ "default" "return"! exprSingle + ) + ; + +switchBooleanMarker +: + { #switchBooleanMarker = #(#[SWITCH_BOOLEAN, "switch-boolean"]); } ; switchCaseClause throws XPathException @@ -988,8 +1320,13 @@ typeswitchExpr throws XPathException { String varName; } : "typeswitch"^ LPAREN! expr RPAREN! - ( caseClause )+ - "default" ( defaultVar )? "return"! exprSingle + ( + // XQ4 braced syntax: typeswitch (...) { case ... default ... } + ( LCURLY "case" ) => + LCURLY! ( caseClause )+ "default" ( defaultVar )? "return"! exprSingle RCURLY! + | + ( caseClause )+ "default" ( defaultVar )? "return"! exprSingle + ) ; caseClause throws XPathException @@ -1024,12 +1361,28 @@ defaultVar throws XPathException ; ifExpr throws XPathException +{ + org.exist.xquery.parser.XQueryAST emptyNode = null; +} : - "if"^ LPAREN! expr RPAREN! t:"then"! thenExpr:exprSingle e:"else"! elseExpr:exprSingle - { - #thenExpr.copyLexInfo(#t); - #elseExpr.copyLexInfo(#e); - } + "if"^ LPAREN! expr RPAREN! + ( + // Traditional: if (cond) then expr else expr + ( "then" ) => + t:"then"! thenExpr:exprSingle e:"else"! elseExpr:exprSingle + { + #thenExpr.copyLexInfo(#t); + #elseExpr.copyLexInfo(#e); + } + | + // XQ4 Braced: if (cond) { expr } (no else clause; returns empty sequence if false) + LCURLY! bracedThenExpr:expr RCURLY! + { + // Synthesize empty sequence as implicit else branch + emptyNode = (org.exist.xquery.parser.XQueryAST) #(#[PARENTHESIZED, "()"]); + #ifExpr.addChild(emptyNode); + } + ) ; // === Logical === @@ -1037,6 +1390,12 @@ ifExpr throws XPathException orExpr throws XPathException : andExpr ( "or"^ andExpr )* + ( + { xq4Enabled }? DOUBLE_QUESTION! exprSingle DOUBLE_BANG! exprSingle + { + #orExpr = #(#[TERNARY, "ternary"], #orExpr); + } + )? ; andExpr throws XPathException @@ -1061,23 +1420,33 @@ castableExpr throws XPathException castExpr throws XPathException : - arrowExpr ( "cast"^ "as"! singleType )? + pipelineExpr ( "cast"^ "as"! singleType )? + ; + +pipelineExpr throws XPathException +: + arrowExpr ( { xq4Enabled }? PIPELINE_OP^ arrowExpr )* ; comparisonExpr throws XPathException : - r1:stringConcatExpr ( - ( BEFORE ) => BEFORE^ stringConcatExpr + r1:otherwiseExpr ( + ( BEFORE ) => BEFORE^ otherwiseExpr | - ( AFTER ) => AFTER^ stringConcatExpr - | ( ( "eq"^ | "ne"^ | "lt"^ | "le"^ | "gt"^ | "ge"^ ) stringConcatExpr ) + ( AFTER ) => AFTER^ otherwiseExpr + | ( ( "eq"^ | "ne"^ | "lt"^ | "le"^ | "gt"^ | "ge"^ ) otherwiseExpr ) | ( GT EQ ) => GT^ EQ^ r2:rangeExpr { #comparisonExpr = #(#[GTEQ, ">="], #r1, #r2); } - | ( ( EQ^ | NEQ^ | GT^ | LT^ | LTEQ^ ) stringConcatExpr ) - | ( ( "is"^ | "isnot"^ ) stringConcatExpr ) + | ( ( EQ^ | NEQ^ | GT^ | LT^ | LTEQ^ ) otherwiseExpr ) + | ( ( "is"^ | "isnot"^ ) otherwiseExpr ) )? ; +otherwiseExpr throws XPathException +: + stringConcatExpr ( { xq4Enabled }? "otherwise"^ stringConcatExpr )* + ; + stringConcatExpr throws XPathException { boolean isConcat = false; } : @@ -1222,13 +1591,15 @@ stepExpr throws XPathException | ( ( "element" | "attribute" | "text" | "document" | "comment" | "namespace-node" | "processing-instruction" | "namespace" | "ordered" | - "unordered" | "map" | "array" ) LCURLY ) => + "unordered" | "map" | "array" | "fn" | "function" ) LCURLY ) => postfixExpr | ( ( "element" | "attribute" | "processing-instruction" | "namespace" ) eqName LCURLY ) => postfixExpr | + ( "fn" LPAREN ) => postfixExpr + | ( MOD | DOLLAR | ( eqName ( LPAREN | HASH ) ) | SELF | LPAREN | literal | XML_COMMENT | LT | - XML_PI | QUESTION | LPPAREN | STRING_CONSTRUCTOR_START ) + XML_PI | QUESTION | LPPAREN | STRING_CONSTRUCTOR_START | STRING_TEMPLATE_START | LCURLY | HASH ) => postfixExpr | axisStep @@ -1272,6 +1643,7 @@ forwardAxisSpecifier : "child" | "self" | "attribute" | "descendant" | "descendant-or-self" | "following-sibling" | "following" + | "following-or-self" | "following-sibling-or-self" ; reverseAxis : reverseAxisSpecifier COLON! COLON! ; @@ -1279,6 +1651,7 @@ reverseAxis : reverseAxisSpecifier COLON! COLON! ; reverseAxisSpecifier : "parent" | "ancestor" | "ancestor-or-self" | "preceding-sibling" | "preceding" + | "preceding-or-self" | "preceding-sibling-or-self" ; nodeTest throws XPathException @@ -1326,18 +1699,42 @@ postfixExpr throws XPathException | (LPAREN) => dynamicFunCall | + // XQ4: ?[ must come before ? lookup to disambiguate + (QUESTION LPPAREN) => filterExprAM + | (QUESTION) => lookup )* ; arrowExpr throws XPathException : - unaryExpr ( ARROW_OP^ arrowFunctionSpecifier argumentList )* + unaryExpr ( + ARROW_OP^ arrowFunctionSpecifier argumentList + | + { xq4Enabled }? MAPPING_ARROW_OP^ arrowFunctionSpecifier argumentList + | + { xq4Enabled }? METHOD_CALL_OP^ NCNAME argumentList + )* ; arrowFunctionSpecifier throws XPathException { String name= null; } : + // XQ4: inline/focus function expression + ( MOD | ( ("function" | "fn") (LPAREN | LCURLY) ) ) => inlineOrFocusFunctionExpr + | + // XQ4: named function reference (eqName '#' arity) + ( eqName HASH ) => namedFunctionRef + | + // XQ4: map constructor as function + ( "map" LCURLY ) => mapConstructor + | + // XQ4: bare map constructor as function + ( LCURLY ) => bareMapConstructor + | + // XQ4: array constructor as function + ( LPPAREN | ("array" LCURLY) ) => arrayConstructor + | name=n:eqName { #arrowFunctionSpecifier= #[EQNAME, name]; @@ -1349,8 +1746,17 @@ arrowFunctionSpecifier throws XPathException varRef ; +filterExprAM throws XPathException +: + q:QUESTION! LPPAREN! expr RPPAREN! + { + #filterExprAM = #(#[FILTER_AM, "filter-am"], #filterExprAM); + #filterExprAM.copyLexInfo(#q); + } + ; + lookup throws XPathException -{ String name= null; } +{ String name= null; String varName= null; } : q:QUESTION! ( @@ -1360,18 +1766,59 @@ lookup throws XPathException #lookup.copyLexInfo(#q); } | + // XQ4: decimal and double literals as key selectors (?1.2, ?1.2e0) + dbl:DOUBLE_LITERAL + { + #lookup = #(#[LOOKUP, "?"], #dbl); + #lookup.copyLexInfo(#q); + } + | + dec:DECIMAL_LITERAL + { + #lookup = #(#[LOOKUP, "?"], #dec); + #lookup.copyLexInfo(#q); + } + | pos:INTEGER_LITERAL { #lookup = #(#[LOOKUP, "?"], #pos); #lookup.copyLexInfo(#q); } | + // XQ4: string literal as key selector (?"first value") + str:STRING_LITERAL + { + #lookup = #(#[LOOKUP, "?"], #str); + #lookup.copyLexInfo(#q); + } + | paren:parenthesizedExpr { #lookup = #(#[LOOKUP, "?"], #paren); #lookup.copyLexInfo(#q); } | + // XQ4: variable reference as key selector (?$var) + DOLLAR! varName=v:varName + { + #lookup = #(#[LOOKUP, "?"], #[VARIABLE_REF, varName]); + #lookup.copyLexInfo(#q); + } + | + // XQ4: context item as key selector (?.) + dot:SELF + { + #lookup = #(#[LOOKUP, "?"], #dot); + #lookup.copyLexInfo(#q); + } + | + // XQ4: QName literal as key selector (?#name) + qnl:qnameLiteral + { + #lookup = #(#[LOOKUP, "?"], #qnl); + #lookup.copyLexInfo(#q); + } + | STAR { #lookup = #(#[LOOKUP, "?*"]); @@ -1423,9 +1870,18 @@ primaryExpr throws XPathException | ( "map" LCURLY ) => mapConstructor | + ( LCURLY RCURLY ) => bareMapConstructor + | + ( LCURLY exprSingle COLON ) => bareMapConstructor + | directConstructor | - ( MOD | "function" LPAREN | eqName HASH ) => functionItemExpr + ( { xq4Enabled }? ( "fn" | "function" ) LCURLY ) => focusFunctionExpr + | + // XQ4: QName literal (#local, #prefix:local, #Q{uri}local) + ( { xq4Enabled }? HASH ) => qnameLiteral + | + ( MOD | ( "fn" | "function" ) LPAREN | eqName HASH ) => functionItemExpr | ( eqName LPAREN ) => functionCall | @@ -1433,6 +1889,8 @@ primaryExpr throws XPathException | ( STRING_CONSTRUCTOR_START ) => stringConstructor | + ( { xq4Enabled }? STRING_TEMPLATE_START ) => stringTemplate + | contextItemExpr | parenthesizedExpr @@ -1459,10 +1917,32 @@ stringConstructorContent throws XPathException stringConstructorInterpolation throws XPathException : STRING_CONSTRUCTOR_INTERPOLATION_START^ - { lexer.inStringConstructor = false; } + { lexer.inStringConstructor = false; lexer.stringConstructorInterpolationDepth++; } ( expr )? STRING_CONSTRUCTOR_INTERPOLATION_END! - { lexer.inStringConstructor = true; } + { lexer.stringConstructorInterpolationDepth--; lexer.inStringConstructor = true; } + ; + +stringTemplate throws XPathException +: + st:STRING_TEMPLATE_START! + { lexer.inStringTemplate = true; } + ( STRING_TEMPLATE_CONTENT | stringTemplateInterpolation )* + STRING_TEMPLATE_END! + { lexer.inStringTemplate = false; } + { + #stringTemplate = #(#[STRING_TEMPLATE, null], #stringTemplate); + #stringTemplate.copyLexInfo(#st); + } + ; + +stringTemplateInterpolation throws XPathException +: + lc:LCURLY! + { lexer.inStringTemplate = false; lexer.stringTemplateDepth++; } + ( expr )? + RCURLY! + { lexer.stringTemplateDepth--; lexer.inStringTemplate = true; } ; mapConstructor throws XPathException @@ -1474,6 +1954,15 @@ mapConstructor throws XPathException } ; +bareMapConstructor throws XPathException +: + lc:LCURLY! ( mapAssignment ( COMMA! mapAssignment )* )? RCURLY! + { + #bareMapConstructor = #(#[MAP, "map"], #bareMapConstructor); + #bareMapConstructor.copyLexInfo(#lc); + } + ; + mapAssignment throws XPathException : (exprSingle COLON! EQ!) => exprSingle COLON^ eq:EQ^ exprSingle @@ -1525,6 +2014,16 @@ literal STRING_LITERAL^ | numericLiteral ; +qnameLiteral throws XPathException +{ String name = null; } +: + h:HASH! name=eqName + { + #qnameLiteral = #(#[QNAME_LITERAL, name]); + #qnameLiteral.copyLexInfo(#h); + } + ; + numericLiteral : DOUBLE_LITERAL^ | DECIMAL_LITERAL^ | INTEGER_LITERAL^ @@ -1539,7 +2038,7 @@ parenthesizedExpr throws XPathException functionItemExpr throws XPathException : - ( MOD | "function" ) => inlineFunctionExpr + ( MOD | "function" | "fn" ) => inlineOrFocusFunctionExpr | namedFunctionRef ; @@ -1553,24 +2052,36 @@ namedFunctionRef throws XPathException } ; -inlineFunctionExpr throws XPathException +inlineOrFocusFunctionExpr throws XPathException : - ann:annotations! "function"! lp:LPAREN! ( paramList )? - RPAREN! ( returnType )? - functionBody + ann:annotations! ( "function"! | "fn"! ) + ( + (LPAREN) => lp:LPAREN! ( paramList )? + RPAREN! ( returnType )? + functionBody + { + #inlineOrFocusFunctionExpr = #(#[INLINE_FUNCTION_DECL, null], #ann, #inlineOrFocusFunctionExpr); + #inlineOrFocusFunctionExpr.copyLexInfo(#lp); + } + | + lc:LCURLY! ( expr )? RCURLY! + { + #inlineOrFocusFunctionExpr = #(#[FOCUS_FUNCTION, null], #inlineOrFocusFunctionExpr); + #inlineOrFocusFunctionExpr.copyLexInfo(#lc); + } + ) + exception catch [RecognitionException e] { - #inlineFunctionExpr = #(#[INLINE_FUNCTION_DECL, null], null, #inlineFunctionExpr); - #inlineFunctionExpr.copyLexInfo(#lp); + throw new XPathException(e.getLine(), e.getColumn(), ErrorCodes.XPST0003, "Syntax error within inline function: " + e.getMessage()); } - exception catch [RecognitionException e] + ; + +focusFunctionExpr throws XPathException +: + ( "fn"! | "function"! ) lc:LCURLY! ( expr )? RCURLY! { - if (#lp == null) { - throw new XPathException(e.getLine(), e.getColumn(), ErrorCodes.XPST0003, "Syntax error within inline function: " + e.getMessage()); - } else { - #lp.setLine(e.getLine()); - #lp.setColumn(e.getColumn()); - throw new XPathException(#lp, ErrorCodes.XPST0003, "Syntax error within user defined function: " + e.getMessage()); - } + #focusFunctionExpr = #(#[FOCUS_FUNCTION, null], #focusFunctionExpr); + #focusFunctionExpr.copyLexInfo(#lc); } ; @@ -1595,8 +2106,34 @@ argumentList throws XPathException argument throws XPathException : - (QUESTION! ( NCNAME | INTEGER_LITERAL | LPAREN | STAR )) => lookup + (QUESTION ( ncnameOrKeyword | INTEGER_LITERAL | DECIMAL_LITERAL | DOUBLE_LITERAL | STRING_LITERAL | LPAREN | DOLLAR | SELF | HASH | STAR )) => unaryLookup | argumentPlaceholder + | ( { xq4Enabled }? ncnameOrKeyword COLON ( EQ | ncnameOrKeyword COLON EQ ) ) => keywordArgument + | exprSingle + ; + +// XQ4: keyword arguments - name := value, or prefix:name := value +keywordArgument throws XPathException +{ String kwName = null; String prefix = null; String local = null; } +: + // Prefixed keyword: prefix:name := value + ( ( ncnameOrKeyword COLON ncnameOrKeyword COLON EQ ) => + prefix=ncnameOrKeyword! COLON! local=ncnameOrKeyword! COLON! EQ! keywordArgumentValue + { kwName = prefix + ":" + local; } + | + // Simple keyword: name := value + kwName=ncnameOrKeyword! COLON! EQ! keywordArgumentValue + ) + { + #keywordArgument = #(#[KEYWORD_ARG, kwName], #keywordArgument); + } + ; + +// XQ4: keyword argument value can be an expression or argument placeholder (?) +// Use lookahead to distinguish bare ? (placeholder) from ?key (unary lookup) +keywordArgumentValue throws XPathException +: + ( QUESTION ( RPAREN | COMMA ) ) => argumentPlaceholder | exprSingle ; @@ -1606,7 +2143,7 @@ contextItemExpr : SELF ; kindTest : - textTest | anyKindTest | elementTest | attributeTest | + textTest | anyKindTest | gnodeTest | elementTest | attributeTest | commentTest | namespaceNodeTest | piTest | documentTest ; @@ -1620,6 +2157,13 @@ anyKindTest "node"^ LPAREN! RPAREN! ; +// XQ4: gnode() is a synonym for node() +gnodeTest +: + "gnode"! LPAREN! RPAREN! + { #gnodeTest = #[LITERAL_node, "node"]; } + ; + elementTest : "element"^ LPAREN! @@ -2074,8 +2618,23 @@ ncnameOrKeyword returns [String name] name=reservedKeywords ; +/** + * Top-level dispatcher for reserved keywords usable as NCNames. + * Split into feature-area sub-rules to reduce merge conflicts on the + * next integration branch. Each feature branch owns its sub-rule; + * merging adds a single alternative here instead of interleaving 80+ lines. + */ reservedKeywords returns [String name] { name= null; } +: + name=coreReservedKeywords + | + name=xq4Keywords + ; + +// ---- Core reserved keywords (XQuery 3.1 + eXist-db extensions) ---- +coreReservedKeywords returns [String name] +{ name= null; } : "element" { name = "element"; } | @@ -2125,6 +2684,14 @@ reservedKeywords returns [String name] | "preceding" { name = "preceding"; } | + "following-or-self" { name = "following-or-self"; } + | + "preceding-or-self" { name = "preceding-or-self"; } + | + "following-sibling-or-self" { name = "following-sibling-or-self"; } + | + "preceding-sibling-or-self" { name = "preceding-sibling-or-self"; } + | "item" { name= "item"; } | "empty" { name= "empty"; } @@ -2137,8 +2704,8 @@ reservedKeywords returns [String name] | "namespace-node" { name= "namespace-node"; } | - "namespace" { name= "namespace"; } - | + "namespace" { name= "namespace"; } + | "if" { name= "if"; } | "then" { name= "then"; } @@ -2177,8 +2744,8 @@ reservedKeywords returns [String name] | "by" { name = "by"; } | - "group" { name = "group"; } - | + "group" { name = "group"; } + | "some" { name = "some"; } | "every" { name = "every"; } @@ -2289,7 +2856,7 @@ reservedKeywords returns [String name] | "tumbling" { name = "tumbling"; } | - "sliding" { name = "sliding"; } + "sliding" { name = "sliding"; } | "window" { name = "window"; } | @@ -2306,6 +2873,27 @@ reservedKeywords returns [String name] "when" { name = "when"; } ; +// ---- XQuery 4.0 keywords (feature/xquery-4.0-parser) ---- +xq4Keywords returns [String name] +{ name= null; } +: + "fn" { name = "fn"; } + | + "member" { name = "member"; } + | + "otherwise" { name = "otherwise"; } + | + "key" { name = "key"; } + | + "while" { name = "while"; } + | + "finally" { name = "finally"; } + | + "record" { name = "record"; } + | + "gnode" { name = "gnode"; } + ; + /** * The XQuery/XPath lexical analyzer. @@ -2324,6 +2912,9 @@ options { protected boolean wsExplicit= false; protected boolean parseStringLiterals= true; protected boolean inStringConstructor = false; + protected boolean inStringTemplate = false; + protected int stringTemplateDepth = 0; + protected int stringConstructorInterpolationDepth = 0; protected boolean inElementContent= false; protected boolean inAttributeContent= false; protected boolean inFunctionBody= false; @@ -2352,11 +2943,35 @@ options { newline(); } } + + /** + * Disambiguate (# as pragma vs ( + #QName literal. + * Scans past (# and the QName. Returns true (pragma) if the QName + * is followed by whitespace or #). Returns false (QName literal) + * if followed by , or ). + */ + private boolean isPragmaContext() throws CharStreamException { + // LA(1)='(' LA(2)='#' -- start scanning from LA(3) + int i = 3; + // Skip the QName (letters, digits, -, ., _, :) + while (Character.isLetterOrDigit(LA(i)) || LA(i) == '-' || LA(i) == '.' || LA(i) == '_' || LA(i) == ':') { + i++; + } + char afterQName = LA(i); + // If followed by , or ) it's a QName literal argument + if (afterQName == ',' || afterQName == ')') { + return false; + } + // Otherwise it's a pragma (whitespace, #), or other pragma content) + return true; + } } protected SLASH options { paraphrase="single slash '/'"; }: '/' ; protected DSLASH options { paraphrase="double slash '//'"; }: '/' '/' ; protected BANG : '!' ; +protected DOUBLE_BANG options { paraphrase="double bang '!!'"; }: '!' '!' ; +protected DOUBLE_QUESTION options { paraphrase="double question '??'"; }: '?' '?' ; protected MOD : '%' ; protected COLON : ':' ; protected COMMA : ',' ; @@ -2374,7 +2989,10 @@ protected SELF options { paraphrase="."; }: '.' ; protected PARENT options { paraphrase=".."; }: ".." ; protected UNION options { paraphrase="union"; }: '|' ; protected CONCAT options { paraphrase="||"; }: '|' '|'; +protected METHOD_CALL_OP options { paraphrase="method call operator"; }: '=' '?' '>'; +protected MAPPING_ARROW_OP options { paraphrase="mapping arrow operator"; }: '=' '!' '>'; protected ARROW_OP options { paraphrase="arrow operator"; }: '=' '>'; +protected PIPELINE_OP options { paraphrase="pipeline operator"; }: '-' '>'; protected AT options { paraphrase="@ char"; }: '@' ; protected DOLLAR options { paraphrase="dollar sign '$'"; }: '$' ; protected EQ options { paraphrase="="; }: '=' ; @@ -2408,12 +3026,17 @@ protected LETTER protected DIGITS : - ( DIGIT )+ + ( DIGIT )+ ( '_' ( DIGIT )+ )* ; protected HEX_DIGITS : - ( '0'..'9' | 'a'..'f' | 'A'..'F' )+ + ( '0'..'9' | 'a'..'f' | 'A'..'F' )+ ( '_' ( '0'..'9' | 'a'..'f' | 'A'..'F' )+ )* + ; + +protected BINARY_DIGITS +: + ( '0' | '1' )+ ( '_' ( '0' | '1' )+ )* ; protected NCNAME @@ -2470,16 +3093,26 @@ protected INTEGER_LITERAL { !(inElementContent || inAttributeContent) }? DIGITS ; +protected HEX_INTEGER_LITERAL +: + { !(inElementContent || inAttributeContent) }? '0' ('x' | 'X') HEX_DIGITS + ; + +protected BINARY_INTEGER_LITERAL +: + { !(inElementContent || inAttributeContent) }? '0' ('b' | 'B') BINARY_DIGITS + ; + protected DOUBLE_LITERAL : { !(inElementContent || inAttributeContent) }? - ( ( '.' DIGITS ) | ( DIGITS ( '.' ( DIGIT )* )? ) ) ( 'e' | 'E' ) ( '+' | '-' )? DIGITS + ( ( '.' DIGITS ) | ( DIGITS ( '.' ( DIGITS )? )? ) ) ( 'e' | 'E' ) ( '+' | '-' )? DIGITS ; protected DECIMAL_LITERAL : { !(inElementContent || inAttributeContent) }? - ( '.' DIGITS ) | ( DIGITS ( '.' ( DIGIT )* )? ) + ( '.' DIGITS ) | ( DIGITS ( '.' ( DIGITS )? )? ) ; protected PREDEFINED_ENTITY_REF @@ -2520,7 +3153,6 @@ options { : ( ( '\n' ) => '\n' { newline(); } | - ( '&' ) => ( PREDEFINED_ENTITY_REF | CHAR_REF ) | ( ( ']' '`' ) ~ ( '`' ) ) => ( ']' '`' ) | ( ']' ~ ( '`' ) ) => ']' | ( '`' ~ ( '{') ) => '`' | @@ -2528,6 +3160,21 @@ options { )+ ; +protected STRING_TEMPLATE_START options { paraphrase="start of string template"; }: '`'; +protected STRING_TEMPLATE_END options { paraphrase="end of string template"; }: '`'; + +protected STRING_TEMPLATE_CONTENT +options { + testLiterals = false; + paraphrase = "string template content"; +} +: + ( + '\n' { newline(); } | + ~ ( '\n' | '{' | '}' | '`') + )+ + ; + protected BRACED_URI_LITERAL options { paraphrase="braced uri literal"; @@ -2641,6 +3288,46 @@ options { testLiterals = false; } : + { inStringTemplate }? + ( '`' '`' ) => '`' '`' { + $setType(STRING_TEMPLATE_CONTENT); + } + | + { inStringTemplate }? + ( '{' '{' ) => '{' '{' { + $setType(STRING_TEMPLATE_CONTENT); + } + | + { inStringTemplate }? + ( '}' '}' ) => '}' '}' { + $setType(STRING_TEMPLATE_CONTENT); + } + | + { inStringTemplate }? + STRING_TEMPLATE_END { + $setType(STRING_TEMPLATE_END); + } + | + { inStringTemplate }? + LCURLY { + $setType(LCURLY); + } + | + { inStringTemplate }? + STRING_TEMPLATE_CONTENT { + $setType(STRING_TEMPLATE_CONTENT); + } + | + { !inStringConstructor && !inStringTemplate }? + ( '`' '`' '[' ) => STRING_CONSTRUCTOR_START { + $setType(STRING_CONSTRUCTOR_START); + } + | + { !inStringConstructor && !inStringTemplate }? + STRING_TEMPLATE_START { + $setType(STRING_TEMPLATE_START); + } + | { !inStringConstructor }? STRING_CONSTRUCTOR_START { $setType(STRING_CONSTRUCTOR_START); @@ -2656,7 +3343,7 @@ options { $setType(STRING_CONSTRUCTOR_INTERPOLATION_START); } | - { !inStringConstructor }? + { !inStringConstructor && stringTemplateDepth == 0 && stringConstructorInterpolationDepth > 0 }? STRING_CONSTRUCTOR_INTERPOLATION_END { $setType(STRING_CONSTRUCTOR_INTERPOLATION_END); } @@ -2777,7 +3464,7 @@ options { ( NAME_START_CHAR ) => ncname:NCNAME { $setType(ncname.getType()); } | - { parseStringLiterals && !inElementContent && !inStringConstructor }? + { parseStringLiterals && !inElementContent && !inStringConstructor && !inStringTemplate }? STRING_LITERAL { $setType(STRING_LITERAL); } | BRACED_URI_LITERAL { $setType(BRACED_URI_LITERAL); } @@ -2801,7 +3488,15 @@ options { ( '.' ) => SELF { $setType(SELF); } | - ( INTEGER_LITERAL ( '.' ( INTEGER_LITERAL )? )? ( 'e' | 'E' ) ) + // XQ4: hex integer literals (0xFF, 0xCAFE_BABE) + ( '0' ('x' | 'X') ) + => HEX_INTEGER_LITERAL { $setType(INTEGER_LITERAL); } + | + // XQ4: binary integer literals (0b1010, 0b1111_0000) + ( '0' ('b' | 'B') ) + => BINARY_INTEGER_LITERAL { $setType(INTEGER_LITERAL); } + | + ( INTEGER_LITERAL ( '.' ( DIGITS )? )? ( 'e' | 'E' ) ) => DOUBLE_LITERAL { $setType(DOUBLE_LITERAL); } | @@ -2816,6 +3511,8 @@ options { { !(inAttributeContent || inElementContent) }? DSLASH { $setType(DSLASH); } | + ( DOUBLE_BANG ) => DOUBLE_BANG { $setType(DOUBLE_BANG); } + | BANG { $setType(BANG); } | COLON { $setType(COLON); } @@ -2828,10 +3525,17 @@ options { | STAR { $setType(STAR); } | + // XQ4: Unicode multiplication sign (U+00D7) as alternative to * + '\u00D7' { $setType(STAR); } + | + ( DOUBLE_QUESTION ) => DOUBLE_QUESTION { $setType(DOUBLE_QUESTION); } + | QUESTION { $setType(QUESTION); } | PLUS { $setType(PLUS); } | + ( PIPELINE_OP ) => PIPELINE_OP { $setType(PIPELINE_OP); } + | MINUS { $setType(MINUS); } | LPPAREN { $setType(LPPAREN); } @@ -2846,6 +3550,10 @@ options { | DOLLAR { $setType(DOLLAR); } | + ( METHOD_CALL_OP ) => METHOD_CALL_OP { $setType(METHOD_CALL_OP); } + | + ( MAPPING_ARROW_OP ) => MAPPING_ARROW_OP { $setType(MAPPING_ARROW_OP); } + | ARROW_OP { $setType(ARROW_OP); } | EQ { $setType(EQ); } @@ -2863,6 +3571,7 @@ options { | XML_CDATA_END { $setType(XML_CDATA_END); } | + { LA(1) == '(' && LA(2) == '#' && isPragmaContext() }? PRAGMA_START { $setType(PRAGMA_START); diff --git a/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g b/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g index 20308296806..45ca178fd5e 100644 --- a/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g +++ b/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g @@ -139,6 +139,11 @@ options { List windowConditions = null; WindowExpr.WindowType windowType = null; boolean allowEmpty = false; + QName valueVarName = null; + SequenceType valueSequenceType = null; + // XQ4 destructuring + List destructureVarNames = null; + List destructureVarTypes = null; } /** @@ -267,14 +272,20 @@ throws PermissionDeniedException, EXistException, XPathException v:VERSION_DECL { final String version = v.getText(); - if (version.equals("3.1")) { + if (version.equals("4.0")) { + context.setXQueryVersion(40); + staticContext.setXQueryVersion(40); + } else if (version.equals("3.1")) { context.setXQueryVersion(31); + staticContext.setXQueryVersion(31); } else if (version.equals("3.0")) { context.setXQueryVersion(30); + staticContext.setXQueryVersion(30); } else if (version.equals("1.0")) { context.setXQueryVersion(10); + staticContext.setXQueryVersion(10); } else { - throw new XPathException(v, ErrorCodes.XQST0031, "Wrong XQuery version: require 1.0, 3.0 or 3.1"); + throw new XPathException(v, ErrorCodes.XQST0031, "Wrong XQuery version: require 1.0, 3.0, 3.1 or 4.0"); } } ( enc:STRING_LITERAL )? @@ -828,7 +839,13 @@ throws PermissionDeniedException, EXistException, XPathException { QName qn= null; try { - qn = QName.parse(staticContext, name.getText(), staticContext.getDefaultFunctionNamespace()); + // XQ4 (PR2200): unprefixed function declarations go into "no namespace" + // instead of the default function namespace (fn:) + if (name.getText() != null && !name.getText().contains(":") && staticContext.getXQueryVersion() >= 40) { + qn = new QName(name.getText(), ""); + } else { + qn = QName.parse(staticContext, name.getText(), staticContext.getDefaultFunctionNamespace()); + } } catch (final IllegalQNameException iqe) { throw new XPathException(name.getLine(), name.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + name.getText()); } @@ -930,11 +947,42 @@ throws PermissionDeniedException, EXistException, XPathException ) ; +focusFunctionDecl [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ step = null; }: + #( + ff:FOCUS_FUNCTION + { + PathExpr body = new PathExpr(context); + body.setASTNode(focusFunctionDecl_AST_in); + + // Create a function with a single implicit parameter + FunctionSignature signature = new FunctionSignature(InlineFunction.INLINE_FUNCTION_QNAME); + UserDefinedFunction func = new UserDefinedFunction(context, signature); + func.setASTNode(ff); + + // Add the implicit focus parameter: $(.focus) as item()* + FunctionParameterSequenceType focusParam = new FunctionParameterSequenceType( + FocusFunction.FOCUS_PARAM_NAME, Type.ITEM, Cardinality.ZERO_OR_MORE, + "implicit focus parameter"); + signature.setArgumentTypes(new SequenceType[] { focusParam }); + signature.setReturnType(new SequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE)); + func.addVariable(FocusFunction.FOCUS_PARAM_NAME); + } + ( expr [body] )? + { + func.setFunctionBody(body); + step = new FocusFunction(context, func); + } + ) + ; + /** * Parse params in function declaration. */ paramList [List vars] -throws XPathException +throws PermissionDeniedException, EXistException, XPathException : param [vars] ( param [vars] )* ; @@ -943,7 +991,7 @@ throws XPathException * Single function param. */ param [List vars] -throws XPathException +throws PermissionDeniedException, EXistException, XPathException : #( varname:VARIABLE_BINDING @@ -959,6 +1007,18 @@ throws XPathException sequenceType [var] ) )? + ( + #( + PARAM_DEFAULT + { + PathExpr defaultExpr = new PathExpr(context); + } + expr [defaultExpr] + { + var.setDefaultValue(defaultExpr.simplify()); + } + ) + )? ) ; @@ -1132,6 +1192,38 @@ throws XPathException ) ) | + #( + RECORD_TEST { type.setPrimaryType(Type.RECORD); } + ( + STAR + { type.setRecordExtensible(true); } + | + ( + ( + #( + rf:RECORD_FIELD + { + final String fieldName = rf.getText(); + boolean optional = false; + SequenceType fieldType = null; + } + ( QUESTION { optional = true; } )? + ( + { fieldType = new SequenceType(); } + sequenceType [fieldType] + )? + { + type.addRecordField(new SequenceType.RecordField( + fieldName, optional, fieldType)); + } + ) + | + STAR { type.setRecordExtensible(true); } + )* + ) + )? + ) + | #( "item" { type.setPrimaryType(Type.ITEM); } ) @@ -1262,6 +1354,37 @@ throws XPathException #( "schema-element" EQNAME ) )? ) + | + #( + CHOICE_TYPE + { + List alternatives = new ArrayList(); + } + ( + { + SequenceType altType = new SequenceType(); + } + sequenceType [altType] + { + alternatives.add(altType); + } + )+ + { + for (final SequenceType alt : alternatives) { + type.addChoiceAlternative(alt); + } + type.setPrimaryType(Type.ITEM); + } + ) + | + #( + en:ENUM_TYPE + { + String enumText = en.getText(); + String[] enumVals = enumText.split(",", -1); + type.setEnumValues(enumVals); + } + ) ) ( STAR { type.setCardinality(Cardinality.ZERO_OR_MORE); } @@ -1293,6 +1416,14 @@ throws PermissionDeniedException, EXistException, XPathException | step=arrowOp [path] | + step=mappingArrowOp [path] + | + step=pipelineOp [path] + | + step=methodCallOp [path] // XQ4 method call operator =?> + | + step=otherwiseExpr [path] + | step=typeCastExpr [path] | // sequence constructor: @@ -1363,301 +1494,1047 @@ throws PermissionDeniedException, EXistException, XPathException } ) | - // conditional: + step=exprFlowControl [path] + | + // treat as: #( - astIf:"if" + "treat" { - PathExpr testExpr= new PathExpr(context); - PathExpr thenExpr= new PathExpr(context); - PathExpr elseExpr= new PathExpr(context); + PathExpr expr = new PathExpr(context); + expr.setASTNode(expr_AST_in); + SequenceType type= new SequenceType(); } - step=expr [testExpr] - step=astThen:expr [thenExpr] - step=astElse:expr [elseExpr] + step=expr [expr] + sequenceType [type] { - thenExpr.setASTNode(astThen); - elseExpr.setASTNode(astElse); - ConditionalExpression cond = - new ConditionalExpression(context, testExpr, thenExpr, - new DebuggableExpression(elseExpr)); - cond.setASTNode(astIf); - path.add(cond); - step = cond; + step = new TreatAsExpression(context, expr, type); + step.setASTNode(expr_AST_in); + path.add(step); } ) | - // quantified expression: some + // switch #( - "some" + switchAST:"switch" { - List clauses= new ArrayList(); - PathExpr satisfiesExpr = new PathExpr(context); - satisfiesExpr.setASTNode(expr_AST_in); + PathExpr operand = new PathExpr(context); + operand.setASTNode(expr_AST_in); + boolean booleanMode = false; } ( - #( - someVarName:VARIABLE_BINDING - { - ForLetClause clause= new ForLetClause(); - PathExpr inputSequence = new PathExpr(context); - inputSequence.setASTNode(expr_AST_in); - } - ( - #( - "as" - { SequenceType type= new SequenceType(); } - sequenceType[type] - ) - { clause.sequenceType = type; } - )? - step=expr[inputSequence] - { - try { - clause.varName = QName.parse(staticContext, someVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(someVarName.getLine(), someVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + someVarName.getText()); - } - clause.inputSequence= inputSequence; - clauses.add(clause); - } - ) - )* - step=expr[satisfiesExpr] - { - Expression action = satisfiesExpr; - for (int i= clauses.size() - 1; i >= 0; i--) { - ForLetClause clause= (ForLetClause) clauses.get(i); - BindingExpression expr = new QuantifiedExpression(context, QuantifiedExpression.SOME); - expr.setASTNode(expr_AST_in); - expr.setVariable(clause.varName); - expr.setSequenceType(clause.sequenceType); - expr.setInputSequence(clause.inputSequence); - expr.setReturnExpression(action); - satisfiesExpr= null; - action= expr; - } - path.add(action); - step = action; - } - ) - | - // quantified expression: every - #( - "every" + SWITCH_BOOLEAN + { booleanMode = true; } + | + step=expr [operand] + ) { - List clauses= new ArrayList(); - PathExpr satisfiesExpr = new PathExpr(context); - satisfiesExpr.setASTNode(expr_AST_in); + SwitchExpression switchExpr = new SwitchExpression(context, operand); + switchExpr.setBooleanMode(booleanMode); + switchExpr.setASTNode(switchAST); + path.add(switchExpr); } ( - #( - everyVarName:VARIABLE_BINDING - { - ForLetClause clause= new ForLetClause(); - PathExpr inputSequence = new PathExpr(context); - inputSequence.setASTNode(expr_AST_in); - } - ( - #( - "as" - { SequenceType type= new SequenceType(); } - sequenceType[type] - ) - { clause.sequenceType = type; } - )? - step=expr[inputSequence] - { - try { - clause.varName = QName.parse(staticContext, everyVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(everyVarName.getLine(), everyVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + everyVarName.getText()); - } - clause.inputSequence= inputSequence; - clauses.add(clause); - } - ) - )* - step=expr[satisfiesExpr] - { - Expression action = satisfiesExpr; - for (int i= clauses.size() - 1; i >= 0; i--) { - ForLetClause clause= (ForLetClause) clauses.get(i); - BindingExpression expr = new QuantifiedExpression(context, QuantifiedExpression.EVERY); - expr.setASTNode(expr_AST_in); - expr.setVariable(clause.varName); - expr.setSequenceType(clause.sequenceType); - expr.setInputSequence(clause.inputSequence); - expr.setReturnExpression(action); - satisfiesExpr= null; - action= expr; + { + List caseOperands = new ArrayList(2); + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(expr_AST_in); } - path.add(action); - step = action; - } + (( + { + PathExpr caseOperand = new PathExpr(context); + caseOperand.setASTNode(expr_AST_in); + } + "case" + expr [caseOperand] + { caseOperands.add(caseOperand); } + )+ + #( + "return" + step= expr [returnExpr] + { switchExpr.addCase(caseOperands, returnExpr); } + )) + )+ + ( + "default" + { + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(expr_AST_in); + } + step=expr [returnExpr] + { + switchExpr.setDefault(returnExpr); + } + ) + { step = switchExpr; } ) | - //try/catch expression + // typeswitch #( - astTry:"try" + "typeswitch" { - PathExpr tryTargetExpr = new PathExpr(context); - tryTargetExpr.setASTNode(expr_AST_in); + PathExpr operand = new PathExpr(context); + operand.setASTNode(expr_AST_in); } - step=expr [tryTargetExpr] + step=expr [operand] { - TryCatchExpression cond = new TryCatchExpression(context, tryTargetExpr); - cond.setASTNode(astTry); - path.add(cond); + TypeswitchExpression tswitch = new TypeswitchExpression(context, operand); + tswitch.setASTNode(expr_AST_in); + path.add(tswitch); } ( { - final List catchErrorList = new ArrayList<>(2); - final List catchVars = new ArrayList<>(3); - final PathExpr catchExpr = new PathExpr(context); - catchExpr.setASTNode(expr_AST_in); + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(expr_AST_in); + QName qn = null; + List types = new ArrayList(2); + SequenceType type = new SequenceType(); } #( - astCatch:"catch" - (catchErrorList [catchErrorList]) + "case" ( - { - QName qncode = null; - QName qndesc = null; - QName qnval = null; - } - code:CATCH_ERROR_CODE + var:VARIABLE_BINDING { try { - qncode = QName.parse(staticContext, code.getText()); - catchVars.add(qncode); + qn = QName.parse(staticContext, var.getText()); } catch (final IllegalQNameException iqe) { - throw new XPathException(code.getLine(), code.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + code.getText()); + throw new XPathException(var.getLine(), var.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + var.getText()); } } - ( - desc:CATCH_ERROR_DESC - { - try { - qndesc = QName.parse(staticContext, desc.getText()); - catchVars.add(qndesc); - } catch (final IllegalQNameException iqe) { - throw new XPathException(desc.getLine(), desc.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + desc.getText()); - } - } - - ( - val:CATCH_ERROR_VAL - { - try { - qnval = QName.parse(staticContext, val.getText()); - catchVars.add(qnval); - } catch (final IllegalQNameException iqe) { - throw new XPathException(val.getLine(), val.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + val.getText()); - } - } - - )? - )? )? - step= expr [catchExpr] - { - catchExpr.setASTNode(astCatch); - cond.addCatchClause(catchErrorList, catchVars, catchExpr); - } + ( + sequenceType[type] + { + types.add(type); + type = new SequenceType(); + } + )+ + // Need return as root in following to disambiguate + // e.g. ( case a xs:integer ( * 3 3 ) ) + // which gives xs:integer* and no operator left for 3 3 ... + // Now ( case a xs:integer ( return ( + 3 3 ) ) ) /ljo + #( + "return" + step= expr [returnExpr] + { + SequenceType[] atype = new SequenceType[types.size()]; + atype = types.toArray(atype); + tswitch.addCase(atype, qn, returnExpr); + } + ) ) + )+ + ( + "default" + { + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(expr_AST_in); + QName qn = null; + } + ( + dvar:VARIABLE_BINDING + { + try { + qn = QName.parse(staticContext, dvar.getText()); + } catch (final IllegalQNameException iqe) { + throw new XPathException(dvar.getLine(), dvar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + dvar.getText()); + } + } + )? + step=expr [returnExpr] + { + tswitch.setDefault(qn, returnExpr); + } + ) + { step = tswitch; } + ) + | + // logical operator: or + #( + "or" + { + PathExpr left= new PathExpr(context); + left.setASTNode(expr_AST_in); + } + step=expr [left] + { + PathExpr right= new PathExpr(context); + right.setASTNode(expr_AST_in); + } + step=expr [right] + ) + { + OpOr or= new OpOr(context); + or.addPath(left); + or.addPath(right); + path.addPath(or); + step = or; + } + | + // logical operator: and + #( + "and" + { + PathExpr left= new PathExpr(context); + left.setASTNode(expr_AST_in); + + PathExpr right= new PathExpr(context); + right.setASTNode(expr_AST_in); + } + step=expr [left] + step=expr [right] + ) + { + OpAnd and= new OpAnd(context); + and.addPath(left); + and.addPath(right); + path.addPath(and); + step = and; + } + | + // union expressions: | and union + #( + UNION + { + PathExpr left= new PathExpr(context); + left.setASTNode(expr_AST_in); + PathExpr right= new PathExpr(context); + right.setASTNode(expr_AST_in); + } + step=expr [left] + step=expr [right] + ) + { + Union union= new Union(context, left, right); + path.add(union); + step = union; + } + | + // intersections: + #( "intersect" { - step = cond; + PathExpr left = new PathExpr(context); + left.setASTNode(expr_AST_in); + + PathExpr right = new PathExpr(context); + right.setASTNode(expr_AST_in); + } + step=expr [left] + step=expr [right] + ) + { + Intersect intersect = new Intersect(context, left, right); + path.add(intersect); + step = intersect; + } + | + #( "except" + { + PathExpr left = new PathExpr(context); + left.setASTNode(expr_AST_in); + + PathExpr right = new PathExpr(context); + right.setASTNode(expr_AST_in); } + step=expr [left] + step=expr [right] ) + { + Except intersect = new Except(context, left, right); + path.add(intersect); + step = intersect; + } | - // FLWOR expressions: let and for + // absolute path expression starting with a / #( - r:"return" + ABSOLUTE_SLASH { - List clauses= new ArrayList(); - Expression action= new PathExpr(context); - action.setASTNode(r); - PathExpr whereExpr= null; - List orderBy= null; + RootNode root= new RootNode(context); + path.add(root); + } + ( step=expr [path] )? + ) + | + // absolute path expression starting with // + #( + ABSOLUTE_DSLASH + { + RootNode root= new RootNode(context); + path.add(root); } ( - #( - f:"for" - ( - #( - varName:VARIABLE_BINDING - { - ForLetClause clause= new ForLetClause(); - clause.ast = varName; - PathExpr inputSequence= new PathExpr(context); - inputSequence.setASTNode(expr_AST_in);inputSequence.setASTNode(expr_AST_in); - final DistinctVariableNames distinctVariableNames = new DistinctVariableNames(); - } - ( - #( - "as" - { clause.sequenceType= new SequenceType(); } - sequenceType [clause.sequenceType] - ) - )? - ( - "empty" - { clause.allowEmpty = true; } - )? - ( - posVar:POSITIONAL_VAR - { - try { - clause.posVar = distinctVariableNames.check(ErrorCodes.XQST0089, posVar, QName.parse(staticContext, posVar.getText(), null)); - } catch (final IllegalQNameException iqe) { - throw new XPathException(posVar.getLine(), posVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + posVar.getText()); - } - } - )? - step=expr [inputSequence] - { - try { - clause.varName = distinctVariableNames.check(ErrorCodes.XQST0089, varName, QName.parse(staticContext, varName.getText(), null)); - } catch (final IllegalQNameException iqe) { - throw new XPathException(varName.getLine(), varName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + varName.getText()); - } - clause.inputSequence= inputSequence; - clauses.add(clause); - } - ) - )+ - ) - | - #( - l:"let" - ( - #( - letVarName:VARIABLE_BINDING - { - ForLetClause clause= new ForLetClause(); - clause.ast = letVarName; - clause.type = FLWORClause.ClauseType.LET; - PathExpr inputSequence= new PathExpr(context); - inputSequence.setASTNode(expr_AST_in); - } + step=expr [path] + { + if (step instanceof LocationStep) { + LocationStep s= (LocationStep) step; + if (s.getAxis() == Constants.ATTRIBUTE_AXIS || + (s.getTest().getType() == Type.ATTRIBUTE && s.getAxis() == Constants.CHILD_AXIS)) + // combines descendant-or-self::node()/attribute:* + s.setAxis(Constants.DESCENDANT_ATTRIBUTE_AXIS); + else { + s.setAxis(Constants.DESCENDANT_SELF_AXIS); + s.setAbbreviated(true); + } + } else + step.setPrimaryAxis(Constants.DESCENDANT_SELF_AXIS); + } + )? + ) + | + // range expression: to + #( + "to" + { + PathExpr start= new PathExpr(context); + start.setASTNode(expr_AST_in); + + PathExpr end= new PathExpr(context); + end.setASTNode(expr_AST_in); + + List args= new ArrayList(2); + args.add(start); + args.add(end); + } + step=expr [start] + step=expr [end] + { + RangeExpression range= new RangeExpression(context); + range.setASTNode(expr_AST_in); + range.setArguments(args); + path.addPath(range); + step = range; + } + ) + | + step=generalComp [path] + | + step=valueComp [path] + | + step=nodeComp [path] + | + step=primaryExpr [path] + | + step=pathExpr [path] + | + step=extensionExpr [path] + | + step=numericExpr [path] + | + step=updateExpr [path] + ; + +/** + * Flow control expressions extracted from expr to avoid + * Java method size limit (64KB bytecode). + * Handles: conditional, ternary, quantified (some/every), + * try/catch/finally, FLWOR, instance of. + */ +exprFlowControl [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ step = null; } +: + // conditional: + #( + astIf:"if" + { + PathExpr testExpr= new PathExpr(context); + PathExpr thenExpr= new PathExpr(context); + PathExpr elseExpr= new PathExpr(context); + } + step=expr [testExpr] + step=astThen:expr [thenExpr] + step=astElse:expr [elseExpr] + { + thenExpr.setASTNode(astThen); + elseExpr.setASTNode(astElse); + ConditionalExpression cond = + new ConditionalExpression(context, testExpr, thenExpr, + new DebuggableExpression(elseExpr)); + cond.setASTNode(astIf); + path.add(cond); + step = cond; + } + ) + | + // ternary conditional: condition ?? then !! else + #( + astTernary:TERNARY + { + PathExpr ternTestExpr = new PathExpr(context); + PathExpr ternThenExpr = new PathExpr(context); + PathExpr ternElseExpr = new PathExpr(context); + } + step=expr [ternTestExpr] + step=expr [ternThenExpr] + step=expr [ternElseExpr] + { + ConditionalExpression ternCond = + new ConditionalExpression(context, ternTestExpr, ternThenExpr, + new DebuggableExpression(ternElseExpr)); + ternCond.setASTNode(astTernary); + path.add(ternCond); + step = ternCond; + } + ) + | + // quantified expression: some + #( + "some" + { + List clauses= new ArrayList(); + PathExpr satisfiesExpr = new PathExpr(context); + satisfiesExpr.setASTNode(exprFlowControl_AST_in); + } + ( + #( + someVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + PathExpr inputSequence = new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + } + ( + #( + "as" + { SequenceType type= new SequenceType(); } + sequenceType[type] + ) + { clause.sequenceType = type; } + )? + step=expr[inputSequence] + { + try { + clause.varName = QName.parse(staticContext, someVarName.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(someVarName.getLine(), someVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + someVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + )* + step=expr[satisfiesExpr] + { + Expression action = satisfiesExpr; + for (int i= clauses.size() - 1; i >= 0; i--) { + ForLetClause clause= (ForLetClause) clauses.get(i); + BindingExpression expr = new QuantifiedExpression(context, QuantifiedExpression.SOME); + expr.setASTNode(exprFlowControl_AST_in); + expr.setVariable(clause.varName); + expr.setSequenceType(clause.sequenceType); + expr.setInputSequence(clause.inputSequence); + expr.setReturnExpression(action); + satisfiesExpr= null; + action= expr; + } + path.add(action); + step = action; + } + ) + | + // quantified expression: every + #( + "every" + { + List clauses= new ArrayList(); + PathExpr satisfiesExpr = new PathExpr(context); + satisfiesExpr.setASTNode(exprFlowControl_AST_in); + } + ( + #( + everyVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + PathExpr inputSequence = new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + } + ( + #( + "as" + { SequenceType type= new SequenceType(); } + sequenceType[type] + ) + { clause.sequenceType = type; } + )? + step=expr[inputSequence] + { + try { + clause.varName = QName.parse(staticContext, everyVarName.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(everyVarName.getLine(), everyVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + everyVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + )* + step=expr[satisfiesExpr] + { + Expression action = satisfiesExpr; + for (int i= clauses.size() - 1; i >= 0; i--) { + ForLetClause clause= (ForLetClause) clauses.get(i); + BindingExpression expr = new QuantifiedExpression(context, QuantifiedExpression.EVERY); + expr.setASTNode(exprFlowControl_AST_in); + expr.setVariable(clause.varName); + expr.setSequenceType(clause.sequenceType); + expr.setInputSequence(clause.inputSequence); + expr.setReturnExpression(action); + satisfiesExpr= null; + action= expr; + } + path.add(action); + step = action; + } + ) + | + //try/catch expression + #( + astTry:"try" + { + PathExpr tryTargetExpr = new PathExpr(context); + tryTargetExpr.setASTNode(exprFlowControl_AST_in); + } + step=expr [tryTargetExpr] + { + TryCatchExpression cond = new TryCatchExpression(context, tryTargetExpr); + cond.setASTNode(astTry); + path.add(cond); + } + ( + { + final List catchErrorList = new ArrayList<>(2); + final List catchVars = new ArrayList<>(3); + final PathExpr catchExpr = new PathExpr(context); + catchExpr.setASTNode(exprFlowControl_AST_in); + } + #( + astCatch:"catch" + (catchErrorList [catchErrorList]) + ( + { + QName qncode = null; + QName qndesc = null; + QName qnval = null; + } + code:CATCH_ERROR_CODE + { + try { + qncode = QName.parse(staticContext, code.getText()); + catchVars.add(qncode); + } catch (final IllegalQNameException iqe) { + throw new XPathException(code.getLine(), code.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + code.getText()); + } + } + ( + desc:CATCH_ERROR_DESC + { + try { + qndesc = QName.parse(staticContext, desc.getText()); + catchVars.add(qndesc); + } catch (final IllegalQNameException iqe) { + throw new XPathException(desc.getLine(), desc.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + desc.getText()); + } + } + + ( + val:CATCH_ERROR_VAL + { + try { + qnval = QName.parse(staticContext, val.getText()); + catchVars.add(qnval); + } catch (final IllegalQNameException iqe) { + throw new XPathException(val.getLine(), val.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + val.getText()); + } + } + + )? + )? + )? + step= expr [catchExpr] + { + catchExpr.setASTNode(astCatch); + cond.addCatchClause(catchErrorList, catchVars, catchExpr); + } + ) + )* + ( + #( + astFinally:"finally" + { + final PathExpr finallyExpr = new PathExpr(context); + finallyExpr.setASTNode(astFinally); + } + (step=expr [finallyExpr])? + { + finallyExpr.setASTNode(astFinally); + cond.setFinallyExpr(finallyExpr); + } + ) + )? + + { + step = cond; + } + ) + | + // FLWOR expressions: let and for + #( + r:"return" + { + List clauses= new ArrayList(); + Expression action= new PathExpr(context); + action.setASTNode(r); + PathExpr whereExpr= null; + List orderBy= null; + } + ( + #( + f:"for" + ( + #( + varName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = varName; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in);inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames distinctVariableNames = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + "empty" + { clause.allowEmpty = true; } + )? + ( + posVar:POSITIONAL_VAR + { + try { + clause.posVar = distinctVariableNames.check(ErrorCodes.XQST0089, posVar, QName.parse(staticContext, posVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(posVar.getLine(), posVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + posVar.getText()); + } + } + )? + step=expr [inputSequence] + { + try { + clause.varName = distinctVariableNames.check(ErrorCodes.XQST0089, varName, QName.parse(staticContext, varName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(varName.getLine(), varName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + varName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + | + #( + FOR_MEMBER + #( + memberVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = memberVarName; + clause.type = FLWORClause.ClauseType.FOR_MEMBER; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames memberDistinctVars = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + memberPosVar:POSITIONAL_VAR + { + try { + clause.posVar = memberDistinctVars.check(ErrorCodes.XQST0089, memberPosVar, QName.parse(staticContext, memberPosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(memberPosVar.getLine(), memberPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + memberPosVar.getText()); + } + } + )? + step=expr [inputSequence] + { + try { + clause.varName = memberDistinctVars.check(ErrorCodes.XQST0089, memberVarName, QName.parse(staticContext, memberVarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(memberVarName.getLine(), memberVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + memberVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + ) + | + #( + FOR_KEY + #( + keyVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = keyVarName; + clause.type = FLWORClause.ClauseType.FOR_KEY; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames keyDistinctVars = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + keyPosVar:POSITIONAL_VAR + { + try { + clause.posVar = keyDistinctVars.check(ErrorCodes.XQST0089, keyPosVar, QName.parse(staticContext, keyPosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(keyPosVar.getLine(), keyPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + keyPosVar.getText()); + } + } + )? + step=expr [inputSequence] + { + try { + clause.varName = keyDistinctVars.check(ErrorCodes.XQST0089, keyVarName, QName.parse(staticContext, keyVarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(keyVarName.getLine(), keyVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + keyVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + ) + | + #( + FOR_VALUE + #( + valueVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = valueVarName; + clause.type = FLWORClause.ClauseType.FOR_VALUE; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames valueDistinctVars = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + valuePosVar:POSITIONAL_VAR + { + try { + clause.posVar = valueDistinctVars.check(ErrorCodes.XQST0089, valuePosVar, QName.parse(staticContext, valuePosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(valuePosVar.getLine(), valuePosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + valuePosVar.getText()); + } + } + )? + step=expr [inputSequence] + { + try { + clause.varName = valueDistinctVars.check(ErrorCodes.XQST0089, valueVarName, QName.parse(staticContext, valueVarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(valueVarName.getLine(), valueVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + valueVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + ) + | + #( + FOR_KEY_VALUE + #( + kvKeyVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = kvKeyVarName; + clause.type = FLWORClause.ClauseType.FOR_KEY_VALUE; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames kvDistinctVars = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + #( + kvValueVar:VALUE_VAR + { + try { + clause.valueVarName = kvDistinctVars.check(ErrorCodes.XQST0089, kvValueVar, QName.parse(staticContext, kvValueVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(kvValueVar.getLine(), kvValueVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + kvValueVar.getText()); + } + } + ( + #( + "as" + { clause.valueSequenceType = new SequenceType(); } + sequenceType [clause.valueSequenceType] + ) + )? + ) + )? + ( + kvPosVar:POSITIONAL_VAR + { + try { + clause.posVar = kvDistinctVars.check(ErrorCodes.XQST0089, kvPosVar, QName.parse(staticContext, kvPosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(kvPosVar.getLine(), kvPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + kvPosVar.getText()); + } + } + )? + step=expr [inputSequence] + { + try { + clause.varName = kvDistinctVars.check(ErrorCodes.XQST0089, kvKeyVarName, QName.parse(staticContext, kvKeyVarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(kvKeyVarName.getLine(), kvKeyVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + kvKeyVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + ) + )+ + ) + | + #( + l:"let" + ( + #( + letVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = letVarName; + clause.type = FLWORClause.ClauseType.LET; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + step=expr [inputSequence] + { + try { + clause.varName = QName.parse(staticContext, letVarName.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(letVarName.getLine(), letVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + letVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + | + // XQ4: sequence destructuring + #( + seqDestAST:SEQ_DESTRUCTURE + { + ForLetClause seqClause = new ForLetClause(); + seqClause.ast = seqDestAST; + seqClause.type = FLWORClause.ClauseType.LET_SEQ_DESTRUCTURE; + seqClause.destructureVarNames = new ArrayList(); + seqClause.destructureVarTypes = new ArrayList(); + String[] seqVarNames = seqDestAST.getText().split(",", -1); + int seqTypedIdx = 0; + boolean[] seqHasType = new boolean[seqVarNames.length]; + for (int dv = 0; dv < seqVarNames.length; dv++) { + String svn = seqVarNames[dv]; + seqHasType[dv] = svn.endsWith("+"); + if (seqHasType[dv]) svn = svn.substring(0, svn.length() - 1); + try { + seqClause.destructureVarNames.add( + QName.parse(staticContext, svn, null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(seqDestAST.getLine(), seqDestAST.getColumn(), + ErrorCodes.XPST0081, "No namespace defined for prefix " + svn); + } + seqClause.destructureVarTypes.add(null); + } + PathExpr seqInput = new PathExpr(context); + seqInput.setASTNode(exprFlowControl_AST_in); + } + ( + #( + DESTRUCTURE_VAR_TYPE + #( + "as" + { + SequenceType seqVarType = new SequenceType(); + while (seqTypedIdx < seqHasType.length && !seqHasType[seqTypedIdx]) seqTypedIdx++; + } + sequenceType [seqVarType] + { + if (seqTypedIdx < seqClause.destructureVarTypes.size()) { + seqClause.destructureVarTypes.set(seqTypedIdx, seqVarType); + } + seqTypedIdx++; + } + ) + ) + )* + ( + #( + "as" + { seqClause.sequenceType = new SequenceType(); } + sequenceType [seqClause.sequenceType] + ) + )? + step=expr [seqInput] + { + seqClause.inputSequence = seqInput; + clauses.add(seqClause); + } + ) + | + // XQ4: array destructuring + #( + arrDestAST:ARRAY_DESTRUCTURE + { + ForLetClause arrClause = new ForLetClause(); + arrClause.ast = arrDestAST; + arrClause.type = FLWORClause.ClauseType.LET_ARRAY_DESTRUCTURE; + arrClause.destructureVarNames = new ArrayList(); + arrClause.destructureVarTypes = new ArrayList(); + String[] arrVarNames = arrDestAST.getText().split(",", -1); + int arrTypedIdx = 0; + boolean[] arrHasType = new boolean[arrVarNames.length]; + for (int dv = 0; dv < arrVarNames.length; dv++) { + String avn = arrVarNames[dv]; + arrHasType[dv] = avn.endsWith("+"); + if (arrHasType[dv]) avn = avn.substring(0, avn.length() - 1); + try { + arrClause.destructureVarNames.add( + QName.parse(staticContext, avn, null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(arrDestAST.getLine(), arrDestAST.getColumn(), + ErrorCodes.XPST0081, "No namespace defined for prefix " + avn); + } + arrClause.destructureVarTypes.add(null); + } + PathExpr arrInput = new PathExpr(context); + arrInput.setASTNode(exprFlowControl_AST_in); + } + ( + #( + DESTRUCTURE_VAR_TYPE + #( + "as" + { + SequenceType arrVarType = new SequenceType(); + while (arrTypedIdx < arrHasType.length && !arrHasType[arrTypedIdx]) arrTypedIdx++; + } + sequenceType [arrVarType] + { + if (arrTypedIdx < arrClause.destructureVarTypes.size()) { + arrClause.destructureVarTypes.set(arrTypedIdx, arrVarType); + } + arrTypedIdx++; + } + ) + ) + )* ( #( "as" - { clause.sequenceType= new SequenceType(); } - sequenceType [clause.sequenceType] + { arrClause.sequenceType = new SequenceType(); } + sequenceType [arrClause.sequenceType] ) )? - step=expr [inputSequence] + step=expr [arrInput] { - try { - clause.varName = QName.parse(staticContext, letVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(letVarName.getLine(), letVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + letVarName.getText()); + arrClause.inputSequence = arrInput; + clauses.add(arrClause); + } + ) + | + // XQ4: map destructuring + #( + mapDestAST:MAP_DESTRUCTURE + { + ForLetClause mapClause = new ForLetClause(); + mapClause.ast = mapDestAST; + mapClause.type = FLWORClause.ClauseType.LET_MAP_DESTRUCTURE; + mapClause.destructureVarNames = new ArrayList(); + mapClause.destructureVarTypes = new ArrayList(); + String[] mapVarNames = mapDestAST.getText().split(",", -1); + int mapTypedIdx = 0; + boolean[] mapHasType = new boolean[mapVarNames.length]; + for (int dv = 0; dv < mapVarNames.length; dv++) { + String mvn = mapVarNames[dv]; + mapHasType[dv] = mvn.endsWith("+"); + if (mapHasType[dv]) mvn = mvn.substring(0, mvn.length() - 1); + try { + mapClause.destructureVarNames.add( + QName.parse(staticContext, mvn, null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(mapDestAST.getLine(), mapDestAST.getColumn(), + ErrorCodes.XPST0081, "No namespace defined for prefix " + mvn); + } + mapClause.destructureVarTypes.add(null); } - clause.inputSequence= inputSequence; - clauses.add(clause); + PathExpr mapInput = new PathExpr(context); + mapInput.setASTNode(exprFlowControl_AST_in); + } + ( + #( + DESTRUCTURE_VAR_TYPE + #( + "as" + { + SequenceType mapVarType = new SequenceType(); + while (mapTypedIdx < mapHasType.length && !mapHasType[mapTypedIdx]) mapTypedIdx++; + } + sequenceType [mapVarType] + { + if (mapTypedIdx < mapClause.destructureVarTypes.size()) { + mapClause.destructureVarTypes.set(mapTypedIdx, mapVarType); + } + mapTypedIdx++; + } + ) + ) + )* + ( + #( + "as" + { mapClause.sequenceType = new SequenceType(); } + sequenceType [mapClause.sequenceType] + ) + )? + step=expr [mapInput] + { + mapClause.inputSequence = mapInput; + clauses.add(mapClause); } ) )+ @@ -1884,7 +2761,7 @@ throws PermissionDeniedException, EXistException, XPathException ( { groupSpecExpr = new PathExpr(context); - groupSpecExpr.setASTNode(expr_AST_in); + groupSpecExpr.setASTNode(exprFlowControl_AST_in); } step=expr [groupSpecExpr] ) @@ -1915,7 +2792,7 @@ throws PermissionDeniedException, EXistException, XPathException ( { PathExpr orderSpecExpr= new PathExpr(context); - orderSpecExpr.setASTNode(expr_AST_in); + orderSpecExpr.setASTNode(exprFlowControl_AST_in); } step=expr [orderSpecExpr] { @@ -1981,7 +2858,7 @@ throws PermissionDeniedException, EXistException, XPathException w:"where" { whereExpr= new PathExpr(context); - whereExpr.setASTNode(expr_AST_in); + whereExpr.setASTNode(exprFlowControl_AST_in); } step=expr [whereExpr] { @@ -1994,422 +2871,176 @@ throws PermissionDeniedException, EXistException, XPathException ) | #( - co:"count" - countVarName:VARIABLE_BINDING + wh:"while" + { + PathExpr whileExpr = new PathExpr(context); + whileExpr.setASTNode(exprFlowControl_AST_in); + } + step=expr [whileExpr] { ForLetClause clause = new ForLetClause(); - clause.ast = co; - try { - clause.varName = QName.parse(staticContext, countVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(countVarName.getLine(), countVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + countVarName.getText()); - } - clause.type = FLWORClause.ClauseType.COUNT; - clause.inputSequence = null; + clause.ast = wh; + clause.type = FLWORClause.ClauseType.WHILE; + clause.inputSequence = whileExpr; clauses.add(clause); } ) - )+ - step=expr [(PathExpr) action] - { - for (int i= clauses.size() - 1; i >= 0; i--) { - ForLetClause clause= (ForLetClause) clauses.get(i); - FLWORClause expr; - switch (clause.type) { - case LET: - expr = new LetExpr(context); - expr.setASTNode(expr_AST_in); - break; - case GROUPBY: - expr = new GroupByClause(context); - break; - case ORDERBY: - expr = new OrderByClause(context, clause.orderSpecs); - break; - case WHERE: - expr = new WhereClause(context, new DebuggableExpression(clause.inputSequence)); - break; - case COUNT: - expr = new CountClause(context, clause.varName); - break; - case WINDOW: - expr = new WindowExpr(context, clause.windowType, clause.windowConditions.get(0), clause.windowConditions.size() > 1 ? clause.windowConditions.get(1) : null); - break; - default: - expr = new ForExpr(context, clause.allowEmpty); - break; - } - expr.setASTNode(clause.ast); - if (clause.type == FLWORClause.ClauseType.FOR || clause.type == FLWORClause.ClauseType.LET - || clause.type == FLWORClause.ClauseType.WINDOW) { - final BindingExpression bind = (BindingExpression)expr; - bind.setVariable(clause.varName); - bind.setSequenceType(clause.sequenceType); - bind.setInputSequence(clause.inputSequence); - if (clause.type == FLWORClause.ClauseType.FOR) { - ((ForExpr) bind).setPositionalVariable(clause.posVar); - } - } else if (clause.type == FLWORClause.ClauseType.GROUPBY) { - if (clause.groupSpecs != null) { - GroupSpec specs[] = new GroupSpec[clause.groupSpecs.size()]; - int k = 0; - for (GroupSpec groupSpec : clause.groupSpecs) { - specs[k++]= groupSpec; - } - ((GroupByClause)expr).setGroupSpecs(specs); - } - } - if (!(action instanceof FLWORClause)) - expr.setReturnExpression(new DebuggableExpression(action)); - else { - expr.setReturnExpression(action); - ((FLWORClause)action).setPreviousClause(expr); - } - - action= expr; - } - - path.add(action); - step = action; - } - ) - | - // instance of: - #( - "instance" - { - PathExpr expr = new PathExpr(context); - expr.setASTNode(expr_AST_in); - SequenceType type= new SequenceType(); - } - step=expr [expr] - sequenceType [type] - { - step = new InstanceOfExpression(context, expr, type); - step.setASTNode(expr_AST_in); - path.add(step); - } - ) - | - // treat as: - #( - "treat" - { - PathExpr expr = new PathExpr(context); - expr.setASTNode(expr_AST_in); - SequenceType type= new SequenceType(); - } - step=expr [expr] - sequenceType [type] - { - step = new TreatAsExpression(context, expr, type); - step.setASTNode(expr_AST_in); - path.add(step); - } - ) - | - // switch - #( - switchAST:"switch" - { - PathExpr operand = new PathExpr(context); - operand.setASTNode(expr_AST_in); - } - step=expr [operand] - { - SwitchExpression switchExpr = new SwitchExpression(context, operand); - switchExpr.setASTNode(switchAST); - path.add(switchExpr); - } - ( - { - List caseOperands = new ArrayList(2); - PathExpr returnExpr = new PathExpr(context); - returnExpr.setASTNode(expr_AST_in); - } - (( - { - PathExpr caseOperand = new PathExpr(context); - caseOperand.setASTNode(expr_AST_in); - } - "case" - expr [caseOperand] - { caseOperands.add(caseOperand); } - )+ - #( - "return" - step= expr [returnExpr] - { switchExpr.addCase(caseOperands, returnExpr); } - )) - )+ - ( - "default" - { - PathExpr returnExpr = new PathExpr(context); - returnExpr.setASTNode(expr_AST_in); - } - step=expr [returnExpr] - { - switchExpr.setDefault(returnExpr); - } - ) - { step = switchExpr; } - ) - | - // typeswitch - #( - "typeswitch" - { - PathExpr operand = new PathExpr(context); - operand.setASTNode(expr_AST_in); - } - step=expr [operand] - { - TypeswitchExpression tswitch = new TypeswitchExpression(context, operand); - tswitch.setASTNode(expr_AST_in); - path.add(tswitch); - } - ( - { - PathExpr returnExpr = new PathExpr(context); - returnExpr.setASTNode(expr_AST_in); - QName qn = null; - List types = new ArrayList(2); - SequenceType type = new SequenceType(); - } - #( - "case" - ( - var:VARIABLE_BINDING - { - try { - qn = QName.parse(staticContext, var.getText()); - } catch (final IllegalQNameException iqe) { - throw new XPathException(var.getLine(), var.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + var.getText()); - } - } - )? - ( - sequenceType[type] - { - types.add(type); - type = new SequenceType(); - } - )+ - // Need return as root in following to disambiguate - // e.g. ( case a xs:integer ( * 3 3 ) ) - // which gives xs:integer* and no operator left for 3 3 ... - // Now ( case a xs:integer ( return ( + 3 3 ) ) ) /ljo - #( - "return" - step= expr [returnExpr] - { - SequenceType[] atype = new SequenceType[types.size()]; - atype = types.toArray(atype); - tswitch.addCase(atype, qn, returnExpr); - } - ) - ) - - )+ - ( - "default" - { - PathExpr returnExpr = new PathExpr(context); - returnExpr.setASTNode(expr_AST_in); - QName qn = null; - } - ( - dvar:VARIABLE_BINDING + | + #( + co:"count" + countVarName:VARIABLE_BINDING { + ForLetClause clause = new ForLetClause(); + clause.ast = co; try { - qn = QName.parse(staticContext, dvar.getText()); + clause.varName = QName.parse(staticContext, countVarName.getText(), null); } catch (final IllegalQNameException iqe) { - throw new XPathException(dvar.getLine(), dvar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + dvar.getText()); + throw new XPathException(countVarName.getLine(), countVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + countVarName.getText()); } + clause.type = FLWORClause.ClauseType.COUNT; + clause.inputSequence = null; + clauses.add(clause); } - )? - step=expr [returnExpr] - { - tswitch.setDefault(qn, returnExpr); + ) + )+ + step=expr [(PathExpr) action] + { + for (int i= clauses.size() - 1; i >= 0; i--) { + ForLetClause clause= (ForLetClause) clauses.get(i); + FLWORClause expr; + switch (clause.type) { + case LET: + expr = new LetExpr(context); + expr.setASTNode(exprFlowControl_AST_in); + break; + case GROUPBY: + expr = new GroupByClause(context); + break; + case ORDERBY: + expr = new OrderByClause(context, clause.orderSpecs); + break; + case WHERE: + expr = new WhereClause(context, new DebuggableExpression(clause.inputSequence)); + break; + case WHILE: + expr = new WhileClause(context, new DebuggableExpression(clause.inputSequence)); + break; + case COUNT: + expr = new CountClause(context, clause.varName); + break; + case WINDOW: + expr = new WindowExpr(context, clause.windowType, clause.windowConditions.get(0), clause.windowConditions.size() > 1 ? clause.windowConditions.get(1) : null); + break; + case FOR_MEMBER: + expr = new ForMemberExpr(context); + break; + case FOR_KEY: + expr = new ForKeyValueExpr(context, FLWORClause.ClauseType.FOR_KEY); + break; + case FOR_VALUE: + expr = new ForKeyValueExpr(context, FLWORClause.ClauseType.FOR_VALUE); + break; + case FOR_KEY_VALUE: + expr = new ForKeyValueExpr(context, FLWORClause.ClauseType.FOR_KEY_VALUE); + break; + case LET_SEQ_DESTRUCTURE: + case LET_ARRAY_DESTRUCTURE: + case LET_MAP_DESTRUCTURE: + { + LetDestructureExpr.DestructureMode dmode; + if (clause.type == FLWORClause.ClauseType.LET_SEQ_DESTRUCTURE) { + dmode = LetDestructureExpr.DestructureMode.SEQUENCE; + } else if (clause.type == FLWORClause.ClauseType.LET_ARRAY_DESTRUCTURE) { + dmode = LetDestructureExpr.DestructureMode.ARRAY; + } else { + dmode = LetDestructureExpr.DestructureMode.MAP; + } + LetDestructureExpr dexpr = new LetDestructureExpr(context, dmode); + dexpr.setASTNode(clause.ast); + for (int j = 0; j < clause.destructureVarNames.size(); j++) { + dexpr.addVariable( + (QName) clause.destructureVarNames.get(j), + clause.destructureVarTypes.size() > j ? + (SequenceType) clause.destructureVarTypes.get(j) : null); + } + dexpr.setInputSequence(clause.inputSequence); + if (clause.sequenceType != null) { + dexpr.setOverallType(clause.sequenceType); + } + expr = dexpr; + break; + } + default: + expr = new ForExpr(context, clause.allowEmpty); + break; + } + expr.setASTNode(clause.ast); + if (clause.type == FLWORClause.ClauseType.FOR || clause.type == FLWORClause.ClauseType.LET + || clause.type == FLWORClause.ClauseType.WINDOW + || clause.type == FLWORClause.ClauseType.FOR_MEMBER + || clause.type == FLWORClause.ClauseType.FOR_KEY + || clause.type == FLWORClause.ClauseType.FOR_VALUE + || clause.type == FLWORClause.ClauseType.FOR_KEY_VALUE) { + final BindingExpression bind = (BindingExpression)expr; + bind.setVariable(clause.varName); + bind.setSequenceType(clause.sequenceType); + bind.setInputSequence(clause.inputSequence); + if (clause.type == FLWORClause.ClauseType.FOR) { + ((ForExpr) bind).setPositionalVariable(clause.posVar); + } else if (clause.type == FLWORClause.ClauseType.FOR_MEMBER) { + ((ForMemberExpr) bind).setPositionalVariable(clause.posVar); + } else if (clause.type == FLWORClause.ClauseType.FOR_KEY + || clause.type == FLWORClause.ClauseType.FOR_VALUE + || clause.type == FLWORClause.ClauseType.FOR_KEY_VALUE) { + ((ForKeyValueExpr) bind).setPositionalVariable(clause.posVar); + if (clause.valueVarName != null) { + ((ForKeyValueExpr) bind).setValueVariable(clause.valueVarName); + if (clause.valueSequenceType != null) { + ((ForKeyValueExpr) bind).setValueSequenceType(clause.valueSequenceType); + } + } + } + } else if (clause.type == FLWORClause.ClauseType.GROUPBY) { + if (clause.groupSpecs != null) { + GroupSpec specs[] = new GroupSpec[clause.groupSpecs.size()]; + int k = 0; + for (GroupSpec groupSpec : clause.groupSpecs) { + specs[k++]= groupSpec; + } + ((GroupByClause)expr).setGroupSpecs(specs); } - ) - { step = tswitch; } - ) - | - // logical operator: or - #( - "or" - { - PathExpr left= new PathExpr(context); - left.setASTNode(expr_AST_in); - } - step=expr [left] - { - PathExpr right= new PathExpr(context); - right.setASTNode(expr_AST_in); - } - step=expr [right] - ) - { - OpOr or= new OpOr(context); - or.addPath(left); - or.addPath(right); - path.addPath(or); - step = or; - } - | - // logical operator: and - #( - "and" - { - PathExpr left= new PathExpr(context); - left.setASTNode(expr_AST_in); - - PathExpr right= new PathExpr(context); - right.setASTNode(expr_AST_in); - } - step=expr [left] - step=expr [right] - ) - { - OpAnd and= new OpAnd(context); - and.addPath(left); - and.addPath(right); - path.addPath(and); - step = and; - } - | - // union expressions: | and union - #( - UNION - { - PathExpr left= new PathExpr(context); - left.setASTNode(expr_AST_in); - - PathExpr right= new PathExpr(context); - right.setASTNode(expr_AST_in); + } + if (!(action instanceof FLWORClause)) + expr.setReturnExpression(new DebuggableExpression(action)); + else { + expr.setReturnExpression(action); + ((FLWORClause)action).setPreviousClause(expr); } - step=expr [left] - step=expr [right] - ) - { - Union union= new Union(context, left, right); - path.add(union); - step = union; - } - | - // intersections: - #( "intersect" - { - PathExpr left = new PathExpr(context); - left.setASTNode(expr_AST_in); - PathExpr right = new PathExpr(context); - right.setASTNode(expr_AST_in); - } - step=expr [left] - step=expr [right] - ) - { - Intersect intersect = new Intersect(context, left, right); - path.add(intersect); - step = intersect; - } - | - #( "except" - { - PathExpr left = new PathExpr(context); - left.setASTNode(expr_AST_in); + action= expr; + } - PathExpr right = new PathExpr(context); - right.setASTNode(expr_AST_in); - } - step=expr [left] - step=expr [right] - ) - { - Except intersect = new Except(context, left, right); - path.add(intersect); - step = intersect; - } - | - // absolute path expression starting with a / - #( - ABSOLUTE_SLASH - { - RootNode root= new RootNode(context); - path.add(root); - } - ( step=expr [path] )? - ) - | - // absolute path expression starting with // - #( - ABSOLUTE_DSLASH - { - RootNode root= new RootNode(context); - path.add(root); + path.add(action); + step = action; } - ( - step=expr [path] - { - if (step instanceof LocationStep) { - LocationStep s= (LocationStep) step; - if (s.getAxis() == Constants.ATTRIBUTE_AXIS || - (s.getTest().getType() == Type.ATTRIBUTE && s.getAxis() == Constants.CHILD_AXIS)) - // combines descendant-or-self::node()/attribute:* - s.setAxis(Constants.DESCENDANT_ATTRIBUTE_AXIS); - else { - s.setAxis(Constants.DESCENDANT_SELF_AXIS); - s.setAbbreviated(true); - } - } else - step.setPrimaryAxis(Constants.DESCENDANT_SELF_AXIS); - } - )? ) | - // range expression: to + // instance of: #( - "to" - { - PathExpr start= new PathExpr(context); - start.setASTNode(expr_AST_in); - - PathExpr end= new PathExpr(context); - end.setASTNode(expr_AST_in); - - List args= new ArrayList(2); - args.add(start); - args.add(end); - } - step=expr [start] - step=expr [end] + "instance" { - RangeExpression range= new RangeExpression(context); - range.setASTNode(expr_AST_in); - range.setArguments(args); - path.addPath(range); - step = range; + PathExpr expr = new PathExpr(context); + expr.setASTNode(exprFlowControl_AST_in); + SequenceType type= new SequenceType(); } - ) - | - step=generalComp [path] - | - step=valueComp [path] - | - step=nodeComp [path] - | - step=primaryExpr [path] - | - step=pathExpr [path] - | - step=extensionExpr [path] - | - step=numericExpr [path] - | - step=updateExpr [path] + step=expr [expr] + sequenceType [type] + { + step = new InstanceOfExpression(context, expr, type); + step.setASTNode(exprFlowControl_AST_in); + path.add(step); + } + ) ; /** @@ -2495,14 +3126,63 @@ throws PermissionDeniedException, EXistException, XPathException step=postfixExpr [step] { path.add(step); } | + ql:QNAME_LITERAL + { + final String qlText = ql.getText(); + final QName qlQName; + try { + qlQName = QName.parse(staticContext, qlText); + } catch (final IllegalQNameException iqe) { + throw new XPathException(ql.getLine(), ql.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + qlText); + } + step = new LiteralValue(context, new QNameValue(context, qlQName)); + step.setASTNode(ql); + } + step=postfixExpr [step] + { path.add(step); } + | step=inlineFunctionDecl [path] step=postfixExpr [step] { path.add(step); } | + step=focusFunctionDecl [path] + step=postfixExpr [step] + { path.add(step); } + | step = lookup [null] step=postfixExpr [step] { path.add(step); } | + #( + stAST:STRING_TEMPLATE + { + StringConstructor st = new StringConstructor(context); + st.setASTNode(stAST); + } + ( + stContent:STRING_TEMPLATE_CONTENT + { + // Unescape {{ -> {, }} -> }, `` -> ` + String raw = stContent.getText(); + raw = raw.replace("{{", "{").replace("}}", "}").replace("``", "`"); + st.addContent(raw); + } + | + { + PathExpr stInterpolation = new PathExpr(context); + stInterpolation.setASTNode(primaryExpr_AST_in); + } + expr[stInterpolation] + { + st.addInterpolation(stInterpolation.simplify()); + } + )* + { + path.add(st); + step = st; + } + ) + | #( scAST:STRING_CONSTRUCTOR_START { @@ -3024,21 +3704,30 @@ throws XPathException | i:INTEGER_LITERAL { - step= new LiteralValue(context, new IntegerValue(i.getText())); + String itext = i.getText().replace("_", ""); + java.math.BigInteger intVal; + if (itext.startsWith("0x") || itext.startsWith("0X")) { + intVal = new java.math.BigInteger(itext.substring(2), 16); + } else if (itext.startsWith("0b") || itext.startsWith("0B")) { + intVal = new java.math.BigInteger(itext.substring(2), 2); + } else { + intVal = new java.math.BigInteger(itext); + } + step= new LiteralValue(context, new IntegerValue(intVal)); step.setASTNode(i); } | ( dec:DECIMAL_LITERAL { - step= new LiteralValue(context, new DecimalValue(dec.getText())); + step= new LiteralValue(context, new DecimalValue(dec.getText().replace("_", ""))); step.setASTNode(dec); } | dbl:DOUBLE_LITERAL { step= new LiteralValue(context, - new DoubleValue(Double.parseDouble(dbl.getText()))); + new DoubleValue(Double.parseDouble(dbl.getText().replace("_", "")))); step.setASTNode(dbl); } ) @@ -3137,6 +3826,19 @@ throws PermissionDeniedException, EXistException, XPathException ( step = lookup [step] | + #( + fam:FILTER_AM + { + PathExpr filterPred = new PathExpr(context); + filterPred.setASTNode(postfixExpr_AST_in); + } + expr [filterPred] + { + step = new FilterExprAM(context, step, filterPred.simplify()); + step.setASTNode(fam); + } + ) + | #( PREDICATE { @@ -3212,6 +3914,55 @@ throws PermissionDeniedException, EXistException, XPathException ( pos:INTEGER_VALUE { position = Integer.parseInt(pos.getText()); } | + // XQ4: string literal as key selector (?"first value") + strKey:STRING_LITERAL + { + lookupExpr.add(new LiteralValue(context, new StringValue(strKey.getText()))); + } + | + // XQ4: decimal literal as key selector (?1.2) + decKey:DECIMAL_LITERAL + { + lookupExpr.add(new LiteralValue(context, new DecimalValue(decKey.getText().replace("_", "")))); + } + | + // XQ4: double literal as key selector (?1.2e0) + dblKey:DOUBLE_LITERAL + { + lookupExpr.add(new LiteralValue(context, new DoubleValue(Double.parseDouble(dblKey.getText().replace("_", ""))))); + } + | + // XQ4: variable reference as key selector (?$var) + varKey:VARIABLE_REF + { + final QName varQn; + try { + varQn = QName.parse(staticContext, varKey.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(varKey.getLine(), varKey.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + varKey.getText()); + } + lookupExpr.add(new VariableReference(context, varQn)); + } + | + // XQ4: context item as key selector (?.) + ctxKey:SELF + { + lookupExpr.add(new ContextItemExpression(context)); + } + | + // XQ4: QName literal as key selector (?#name) + qnKey:QNAME_LITERAL + { + final String qnText = qnKey.getText(); + final QName qnQName; + try { + qnQName = QName.parse(staticContext, qnText); + } catch (final IllegalQNameException iqe) { + throw new XPathException(qnKey.getLine(), qnKey.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + qnText); + } + lookupExpr.add(new LiteralValue(context, new QNameValue(context, qnQName))); + } + | ( expr [lookupExpr] )+ )? { @@ -3254,6 +4005,27 @@ throws PermissionDeniedException, EXistException, XPathException isPartial = true; } | + #( + kw:KEYWORD_ARG + ( + QUESTION { + // Keyword argument with placeholder value: name := ? + params.add(new KeywordArgumentExpression(context, kw.getText(), + new Function.Placeholder(context))); + isPartial = true; + } + | + { + PathExpr kwExpr = new PathExpr(context); + kwExpr.setASTNode(functionCall_AST_in); + } + expr [kwExpr] + { + params.add(new KeywordArgumentExpression(context, kw.getText(), kwExpr)); + } + ) + ) + | expr [pathExpr] { params.add(pathExpr); } ) )* @@ -3288,7 +4060,7 @@ throws PermissionDeniedException, EXistException, XPathException } catch (final IllegalQNameException iqe) { throw new XPathException(name.getLine(), name.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + name.getText()); } - NamedFunctionReference ref = new NamedFunctionReference(context, qname, Integer.parseInt(arity.getText())); + NamedFunctionReference ref = new NamedFunctionReference(context, qname, Integer.parseInt(arity.getText().replace("_", ""))); step = ref; } ) @@ -3321,6 +4093,14 @@ throws PermissionDeniedException, EXistException "ancestor" { axis= Constants.ANCESTOR_AXIS; } | "ancestor-or-self" { axis= Constants.ANCESTOR_SELF_AXIS; } + | + "following-or-self" { axis= Constants.FOLLOWING_OR_SELF_AXIS; } + | + "preceding-or-self" { axis= Constants.PRECEDING_OR_SELF_AXIS; } + | + "following-sibling-or-self" { axis= Constants.FOLLOWING_SIBLING_OR_SELF_AXIS; } + | + "preceding-sibling-or-self" { axis= Constants.PRECEDING_SIBLING_OR_SELF_AXIS; } ; valueComp [PathExpr path] @@ -3818,6 +4598,140 @@ throws PermissionDeniedException, EXistException, XPathException ) ; +mappingArrowOp [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step= null; +}: + #( + mapArrowAST:MAPPING_ARROW_OP + { + PathExpr leftExpr = new PathExpr(context); + leftExpr.setASTNode(mappingArrowOp_AST_in); + } + expr [leftExpr] + { + MappingArrowOperator op = new MappingArrowOperator(context, leftExpr.simplify()); + op.setASTNode(mapArrowAST); + path.add(op); + step = op; + + PathExpr nameExpr = new PathExpr(context); + nameExpr.setASTNode(mappingArrowOp_AST_in); + String name = null; + } + ( + eq:EQNAME + { name = eq.toString(); } + | + expr [nameExpr] + ) + { List params = new ArrayList(5); } + ( + { + PathExpr pathExpr = new PathExpr(context); + pathExpr.setASTNode(mappingArrowOp_AST_in); + } + expr [pathExpr] { params.add(pathExpr.simplify()); } + )* + { + if (name == null) { + op.setArrowFunction(nameExpr, params); + } else { + op.setArrowFunction(name, params); + } + } + ) + ; + +pipelineOp [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; +}: + #( + pipeAST:PIPELINE_OP + { + PathExpr leftExpr = new PathExpr(context); + leftExpr.setASTNode(pipelineOp_AST_in); + } + expr [leftExpr] + { + PathExpr rightExpr = new PathExpr(context); + rightExpr.setASTNode(pipelineOp_AST_in); + } + expr [rightExpr] + { + step = new PipelineExpression(context, leftExpr.simplify(), rightExpr.simplify()); + step.setASTNode(pipeAST); + path.add(step); + } + ) + ; + +methodCallOp [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; +}: + #( + mcAST:METHOD_CALL_OP + { + PathExpr leftExpr = new PathExpr(context); + leftExpr.setASTNode(methodCallOp_AST_in); + } + expr [leftExpr] + mn:NCNAME + { + MethodCallOperator op = new MethodCallOperator(context, leftExpr.simplify()); + op.setASTNode(mcAST); + path.add(op); + step = op; + + List params = new ArrayList(5); + } + ( + { + PathExpr pathExpr = new PathExpr(context); + pathExpr.setASTNode(methodCallOp_AST_in); + } + expr [pathExpr] { params.add(pathExpr.simplify()); } + )* + { + op.setMethod(mn.getText(), params); + } + ) + ; + +otherwiseExpr [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; +}: + #( + owAST:LITERAL_otherwise + { + PathExpr leftExpr = new PathExpr(context); + leftExpr.setASTNode(otherwiseExpr_AST_in); + } + expr [leftExpr] + { + PathExpr rightExpr = new PathExpr(context); + rightExpr.setASTNode(otherwiseExpr_AST_in); + } + expr [rightExpr] + { + step = new OtherwiseExpression(context, leftExpr.simplify(), rightExpr.simplify()); + step.setASTNode(owAST); + path.add(step); + } + ) + ; + typeCastExpr [PathExpr path] returns [Expression step] throws PermissionDeniedException, EXistException, XPathException @@ -3832,25 +4746,72 @@ throws PermissionDeniedException, EXistException, XPathException Cardinality cardinality= Cardinality.EXACTLY_ONE; } step=expr [expr] - t:ATOMIC_TYPE ( - QUESTION - { cardinality= Cardinality.ZERO_OR_ONE; } - )? - { - try { - QName qn= QName.parse(staticContext, t.getText()); - int code= Type.getType(qn); - CastExpression castExpr= new CastExpression(context, expr, code, cardinality); + #( + CHOICE_TYPE + { + List choiceTypes = new ArrayList(); + } + ( + ct:ATOMIC_TYPE + { + try { + QName qn = QName.parse(staticContext, ct.getText()); + choiceTypes.add(Type.getType(qn)); + } catch (final XPathException e) { + throw new XPathException(ct.getLine(), ct.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + ct.getText()); + } catch (final IllegalQNameException e) { + throw new XPathException(ct.getLine(), ct.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + ct.getText()); + } + } + )+ + ) + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + int[] types = new int[choiceTypes.size()]; + for (int ci = 0; ci < choiceTypes.size(); ci++) { types[ci] = choiceTypes.get(ci); } + ChoiceCastExpression castExpr = new ChoiceCastExpression(context, expr, types, cardinality); castExpr.setASTNode(castAST); path.add(castExpr); step = castExpr; - } catch (final XPathException e) { - throw new XPathException(t.getLine(), t.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + t.getText()); - } catch (final IllegalQNameException e) { - throw new XPathException(t.getLine(), t.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + t.getText()); } - } + | + t:ATOMIC_TYPE + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + try { + QName qn= QName.parse(staticContext, t.getText()); + int code= Type.getType(qn); + CastExpression castExpr= new CastExpression(context, expr, code, cardinality); + castExpr.setASTNode(castAST); + path.add(castExpr); + step = castExpr; + } catch (final XPathException e) { + throw new XPathException(t.getLine(), t.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + t.getText()); + } catch (final IllegalQNameException e) { + throw new XPathException(t.getLine(), t.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + t.getText()); + } + } + | + enumCast:ENUM_TYPE + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + String[] enumVals = enumCast.getText().split(",", -1); + EnumCastExpression enumCastExpr = new EnumCastExpression(context, expr, enumVals, cardinality, false); + enumCastExpr.setASTNode(castAST); + path.add(enumCastExpr); + step = enumCastExpr; + } + ) ) | #( @@ -3861,25 +4822,72 @@ throws PermissionDeniedException, EXistException, XPathException Cardinality cardinality= Cardinality.EXACTLY_ONE; } step=expr [expr] - t2:ATOMIC_TYPE ( - QUESTION - { cardinality= Cardinality.ZERO_OR_ONE; } - )? - { - try { - QName qn= QName.parse(staticContext, t2.getText()); - int code= Type.getType(qn); - CastableExpression castExpr= new CastableExpression(context, expr, code, cardinality); - castExpr.setASTNode(castAST); + #( + CHOICE_TYPE + { + List choiceTypes2 = new ArrayList(); + } + ( + ct2:ATOMIC_TYPE + { + try { + QName qn = QName.parse(staticContext, ct2.getText()); + choiceTypes2.add(Type.getType(qn)); + } catch (final XPathException e) { + throw new XPathException(ct2.getLine(), ct2.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + ct2.getText()); + } catch (final IllegalQNameException e) { + throw new XPathException(ct2.getLine(), ct2.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + ct2.getText()); + } + } + )+ + ) + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + int[] types2 = new int[choiceTypes2.size()]; + for (int ci = 0; ci < choiceTypes2.size(); ci++) { types2[ci] = choiceTypes2.get(ci); } + ChoiceCastableExpression castExpr = new ChoiceCastableExpression(context, expr, types2, cardinality); + castExpr.setASTNode(castableAST); path.add(castExpr); step = castExpr; - } catch (final XPathException e) { - throw new XPathException(t2.getLine(), t2.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + t2.getText()); - } catch (final IllegalQNameException e) { - throw new XPathException(t2.getLine(), t2.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + t2.getText()); } - } + | + t2:ATOMIC_TYPE + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + try { + QName qn= QName.parse(staticContext, t2.getText()); + int code= Type.getType(qn); + CastableExpression castExpr= new CastableExpression(context, expr, code, cardinality); + castExpr.setASTNode(castableAST); + path.add(castExpr); + step = castExpr; + } catch (final XPathException e) { + throw new XPathException(t2.getLine(), t2.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + t2.getText()); + } catch (final IllegalQNameException e) { + throw new XPathException(t2.getLine(), t2.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + t2.getText()); + } + } + | + enumCastable:ENUM_TYPE + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + String[] enumVals2 = enumCastable.getText().split(",", -1); + EnumCastExpression enumCastExpr2 = new EnumCastExpression(context, expr, enumVals2, cardinality, true); + enumCastExpr2.setASTNode(castableAST); + path.add(enumCastExpr2); + step = enumCastExpr2; + } + ) ) ; diff --git a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java index ca85a06f5fe..f2dbb185acb 100644 --- a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java +++ b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java @@ -28,6 +28,18 @@ public class EXistOutputKeys { */ public static final String ITEM_SEPARATOR = "item-separator"; + // --- QT4 Serialization 4.0 parameters --- + public static final String CANONICAL = "canonical"; + public static final String ESCAPE_SOLIDUS = "escape-solidus"; + public static final String JSON_LINES = "json-lines"; + + // --- CSV serialization parameters --- + public static final String CSV_FIELD_DELIMITER = "csv.field-delimiter"; + public static final String CSV_ROW_DELIMITER = "csv.row-delimiter"; + public static final String CSV_QUOTE_CHARACTER = "csv.quote-character"; + public static final String CSV_HEADER = "csv.header"; + public static final String CSV_QUOTES = "csv.quotes"; + public static final String OMIT_ORIGINAL_XML_DECLARATION = "omit-original-xml-declaration"; public static final String OUTPUT_DOCTYPE = "output-doctype"; diff --git a/exist-core/src/main/java/org/exist/util/Collations.java b/exist-core/src/main/java/org/exist/util/Collations.java index 2d03138a291..af3ca1683f5 100644 --- a/exist-core/src/main/java/org/exist/util/Collations.java +++ b/exist-core/src/main/java/org/exist/util/Collations.java @@ -75,6 +75,11 @@ public class Collations { */ public final static String HTML_ASCII_CASE_INSENSITIVE_COLLATION_URI = "http://www.w3.org/2005/xpath-functions/collation/html-ascii-case-insensitive"; + /** + * The Unicode Case-Insensitive Collation as defined by XPath F&O 4.0. + */ + public final static String UNICODE_CASE_INSENSITIVE_COLLATION_URI = "http://www.w3.org/2005/xpath-functions/collation/unicode-case-insensitive"; + /** * The XQTS ASCII Case-blind Collation as defined by the XQTS 3.1. */ @@ -90,6 +95,11 @@ public class Collations { */ private final static AtomicReference htmlAsciiCaseInsensitiveCollator = new AtomicReference<>(); + /** + * Lazy-initialized singleton Unicode Case Insensitive Collator + */ + private final static AtomicReference unicodeCaseInsensitiveCollator = new AtomicReference<>(); + /** * Lazy-initialized singleton XQTS Case Blind Collator */ @@ -276,6 +286,12 @@ public class Collations { } catch (final Exception e) { throw new XPathException(expression, "Unable to instantiate HTML ASCII Case Insensitive Collator: " + e.getMessage(), e); } + } else if(UNICODE_CASE_INSENSITIVE_COLLATION_URI.equals(uri)) { + try { + collator = getUnicodeCaseInsensitiveCollator(); + } catch (final Exception e) { + throw new XPathException(expression, "Unable to instantiate Unicode Case Insensitive Collator: " + e.getMessage(), e); + } } else if(XQTS_ASCII_CASE_BLIND_COLLATION_URI.equals(uri)) { try { collator = getXqtsAsciiCaseBlindCollator(); @@ -346,7 +362,24 @@ public static boolean equals(@Nullable final Collator collator, final String s1, */ public static int compare(@Nullable final Collator collator, final String s1,final String s2) { if (collator == null) { - return s1 == null ? (s2 == null ? 0 : -1) : s1.compareTo(s2); + if (s1 == null) { + return s2 == null ? 0 : -1; + } + // Compare by Unicode codepoints, not UTF-16 code units. + // String.compareTo() compares char (UTF-16) values, which gives wrong + // ordering for supplementary characters (U+10000+) encoded as surrogate pairs. + int i1 = 0, i2 = 0; + while (i1 < s1.length() && i2 < s2.length()) { + final int cp1 = s1.codePointAt(i1); + final int cp2 = s2.codePointAt(i2); + if (cp1 != cp2) { + return cp1 - cp2; + } + i1 += Character.charCount(cp1); + i2 += Character.charCount(cp2); + } + // Shorter string is less; equal length means equal + return (s1.length() - i1) - (s2.length() - i2); } else { return collator.compare(s1, s2); } @@ -371,10 +404,16 @@ public static boolean startsWith(@Nullable final Collator collator, final String return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first() == 0; + } else { + // Fallback for non-RuleBasedCollator (e.g., HtmlAsciiCaseInsensitiveCollator) + if (s1.length() >= s2.length()) { + return collator.compare(s1.substring(0, s2.length()), s2) == 0; + } + return false; } } } @@ -398,9 +437,9 @@ public static boolean endsWith(@Nullable final Collator collator, final String s return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); int lastPos = SearchIterator.DONE; int lastLen = 0; for (int pos = searchIterator.first(); pos != SearchIterator.DONE; @@ -410,6 +449,12 @@ public static boolean endsWith(@Nullable final Collator collator, final String s } return lastPos > SearchIterator.DONE && lastPos + lastLen == s1.length(); + } else { + // Fallback for non-RuleBasedCollator + if (s1.length() >= s2.length()) { + return collator.compare(s1.substring(s1.length() - s2.length()), s2) == 0; + } + return false; } } } @@ -433,10 +478,18 @@ public static boolean contains(@Nullable final Collator collator, final String s return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first() >= 0; + } else { + // Fallback for non-RuleBasedCollator + for (int i = 0; i <= s1.length() - s2.length(); i++) { + if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) { + return true; + } + } + return false; } } } @@ -459,10 +512,18 @@ public static int indexOf(@Nullable final Collator collator, final String s1, fi return 0; } else if (s1.isEmpty()) { return -1; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first(); + } else { + // Fallback for non-RuleBasedCollator + for (int i = 0; i <= s1.length() - s2.length(); i++) { + if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) { + return i; + } + } + return -1; } } } @@ -809,21 +870,119 @@ private static Collator getSamiskCollator() throws Exception { return collator; } - private static Collator getHtmlAsciiCaseInsensitiveCollator() throws Exception { + private static Collator getHtmlAsciiCaseInsensitiveCollator() { Collator collator = htmlAsciiCaseInsensitiveCollator.get(); if (collator == null) { - collator = new RuleBasedCollator("&a=A &b=B &c=C &d=D &e=E &f=F &g=G &h=H " - + "&i=I &j=J &k=K &l=L &m=M &n=N &o=O &p=P &q=Q &r=R &s=S &t=T " - + "&u=U &v=V &w=W &x=X &y=Y &z=Z"); - collator.setStrength(Collator.PRIMARY); + // XQ4 html-ascii-case-insensitive: ASCII letters A-Z fold to a-z, + // all other characters compare by Unicode codepoint order. + // Cannot use RuleBasedCollator with PRIMARY strength because that + // makes ALL case/accent differences irrelevant, not just ASCII. htmlAsciiCaseInsensitiveCollator.compareAndSet(null, - collator.freeze()); + new HtmlAsciiCaseInsensitiveCollator()); collator = htmlAsciiCaseInsensitiveCollator.get(); } return collator; } + private static Collator getUnicodeCaseInsensitiveCollator() { + Collator collator = unicodeCaseInsensitiveCollator.get(); + if (collator == null) { + // Unicode case-insensitive: UCA with SECONDARY strength + // ignores case differences but respects accents and other distinctions + final Collator uca = Collator.getInstance(); + uca.setStrength(Collator.SECONDARY); + unicodeCaseInsensitiveCollator.compareAndSet(null, uca); + collator = unicodeCaseInsensitiveCollator.get(); + } + + return collator; + } + + /** + * Custom Collator for HTML ASCII case-insensitive comparison. + * Folds only ASCII letters A-Z to a-z, then compares by Unicode codepoint. + * Non-ASCII characters are compared by their codepoint value without folding. + */ + private static final class HtmlAsciiCaseInsensitiveCollator extends Collator { + + @Override + public int compare(final String source, final String target) { + int i1 = 0, i2 = 0; + while (i1 < source.length() && i2 < target.length()) { + int cp1 = source.codePointAt(i1); + int cp2 = target.codePointAt(i2); + // Fold ASCII uppercase to lowercase only + if (cp1 >= 'A' && cp1 <= 'Z') { + cp1 += 32; + } + if (cp2 >= 'A' && cp2 <= 'Z') { + cp2 += 32; + } + if (cp1 != cp2) { + return cp1 - cp2; + } + i1 += Character.charCount(cp1); + i2 += Character.charCount(cp2); + } + return (source.length() - i1) - (target.length() - i2); + } + + @Override + public CollationKey getCollationKey(final String source) { + throw new UnsupportedOperationException("CollationKey not supported for HTML ASCII case-insensitive collation"); + } + + @Override + public RawCollationKey getRawCollationKey(final String source, final RawCollationKey key) { + throw new UnsupportedOperationException("RawCollationKey not supported for HTML ASCII case-insensitive collation"); + } + + @Override + public int setVariableTop(final String varTop) { + return 0; + } + + @Override + public int getVariableTop() { + return 0; + } + + @Override + public void setVariableTop(final int varTop) { + } + + @Override + public VersionInfo getVersion() { + return VersionInfo.getInstance(1); + } + + @Override + public VersionInfo getUCAVersion() { + return VersionInfo.getInstance(1); + } + + @Override + public int hashCode() { + return HtmlAsciiCaseInsensitiveCollator.class.hashCode(); + } + + @Override + public Collator freeze() { + return this; + } + + @Override + public boolean isFrozen() { + return true; + } + + @Override + public Collator cloneAsThawed() { + return new HtmlAsciiCaseInsensitiveCollator(); + } + } + private static Collator getXqtsAsciiCaseBlindCollator() throws Exception { Collator collator = xqtsAsciiCaseBlindCollator.get(); if (collator == null) { diff --git a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java index 758ccee130a..a1b7c9890b3 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java @@ -81,13 +81,27 @@ protected SerializerWriter getDefaultWriter() { public void setOutput(Writer writer, Properties properties) { outputProperties = Objects.requireNonNullElseGet(properties, () -> new Properties(defaultProperties)); final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml"); - final String htmlVersionProp = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION, "1.0"); - + // For html/xhtml methods, determine HTML version: + // 1. Use html-version if explicitly set + // 2. Otherwise use version (W3C spec: version controls HTML version for html method) + // 3. Default to 5.0 double htmlVersion; - try { - htmlVersion = Double.parseDouble(htmlVersionProp); - } catch (NumberFormatException e) { - htmlVersion = 1.0; + final String explicitHtmlVersion = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION); + if (explicitHtmlVersion != null) { + try { + htmlVersion = Double.parseDouble(explicitHtmlVersion); + } catch (NumberFormatException e) { + htmlVersion = 5.0; + } + } else if (("html".equalsIgnoreCase(method) || "xhtml".equalsIgnoreCase(method)) + && outputProperties.getProperty(OutputKeys.VERSION) != null) { + try { + htmlVersion = Double.parseDouble(outputProperties.getProperty(OutputKeys.VERSION)); + } catch (NumberFormatException e) { + htmlVersion = 5.0; + } + } else { + htmlVersion = 5.0; } final SerializerWriter baseSerializerWriter = getBaseSerializerWriter(method, htmlVersion); diff --git a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java index 22ab6dfca23..d2b45fb3729 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java @@ -190,10 +190,15 @@ private void writeAtomic(AtomicValue value) throws IOException, SAXException, XP } private void writeDouble(final DoubleValue item) throws SAXException { - final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US); - symbols.setExponentSeparator("e"); - final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols); - writeText(df.format(item.getDouble())); + final double d = item.getDouble(); + if (Double.isInfinite(d) || Double.isNaN(d)) { + writeText(item.getStringValue()); + } else { + final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US); + symbols.setExponentSeparator("e"); + final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols); + writeText(df.format(d)); + } } private void writeArray(final ArrayType array) throws XPathException, SAXException, TransformerException { @@ -215,8 +220,6 @@ private void writeArray(final ArrayType array) throws XPathException, SAXExcepti private void writeMap(final AbstractMapType map) throws SAXException, XPathException, TransformerException { try { - writer.write("map"); - addSpaceIfIndent(); writer.write('{'); addIndent(); indent(); diff --git a/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java new file mode 100644 index 00000000000..37675a4e54e --- /dev/null +++ b/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java @@ -0,0 +1,297 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.util.serializer; + +import io.lacuna.bifurcan.IEntry; +import org.exist.storage.serializers.EXistOutputKeys; +import org.exist.xquery.XPathException; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.io.Writer; +import java.util.*; + +/** + * Serializes XDM sequences as RFC 4180 CSV output. + * + * Accepts three input formats: + *
    + *
  • Array of arrays: each inner array is a row
  • + *
  • Sequence of maps: keys become header, values become rows
  • + *
  • XML table: <csv><record><field>...</field></record></csv>
  • + *
+ */ +public class CSVSerializer { + + private final Properties outputProperties; + private final String fieldDelimiter; + private final String rowDelimiter; + private final char quoteChar; + private final boolean alwaysQuote; + private final boolean includeHeader; + + public CSVSerializer(final Properties outputProperties) { + this.outputProperties = outputProperties; + this.fieldDelimiter = outputProperties.getProperty(EXistOutputKeys.CSV_FIELD_DELIMITER, ","); + this.rowDelimiter = outputProperties.getProperty(EXistOutputKeys.CSV_ROW_DELIMITER, "\n"); + final String qc = outputProperties.getProperty(EXistOutputKeys.CSV_QUOTE_CHARACTER, "\""); + this.quoteChar = qc.isEmpty() ? '"' : qc.charAt(0); + this.alwaysQuote = !"no".equals(outputProperties.getProperty(EXistOutputKeys.CSV_QUOTES, "yes")); + this.includeHeader = "yes".equals(outputProperties.getProperty(EXistOutputKeys.CSV_HEADER, "no")); + } + + public void serialize(final Sequence sequence, final Writer writer) throws SAXException { + try { + if (sequence.isEmpty()) { + return; + } + + final Item first = sequence.itemAt(0); + + if (first.getType() == Type.ARRAY_ITEM) { + if (sequence.hasOne()) { + // Single array: treat as array-of-arrays + serializeArrayOfArrays((ArrayType) first, writer); + } else { + // Sequence of arrays: each array is a row + serializeSequenceOfArrays(sequence, writer); + } + } else if (first.getType() == Type.MAP_ITEM) { + serializeSequenceOfMaps(sequence, writer); + } else if (Type.subTypeOf(first.getType(), Type.NODE)) { + serializeXmlTable(sequence, writer); + } else { + // Single atomic or sequence of atomics — one row + serializeAtomicSequence(sequence, writer); + } + } catch (final IOException | XPathException e) { + throw new SAXException(e.getMessage(), e); + } + } + + private void serializeArrayOfArrays(final ArrayType outerArray, final Writer writer) throws IOException, XPathException { + for (int i = 0; i < outerArray.getSize(); i++) { + final Sequence member = outerArray.get(i); + if (member.getItemCount() == 1 && member.itemAt(0).getType() == Type.ARRAY_ITEM) { + writeRow((ArrayType) member.itemAt(0), writer); + } else { + writeSequenceRow(member, writer); + } + writer.write(rowDelimiter); + } + } + + private void serializeSequenceOfArrays(final Sequence sequence, final Writer writer) throws IOException, XPathException { + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (item.getType() == Type.ARRAY_ITEM) { + writeRow((ArrayType) item, writer); + } else { + writer.write(quoteField(item.getStringValue())); + } + writer.write(rowDelimiter); + } + } + + private void serializeSequenceOfMaps(final Sequence sequence, final Writer writer) throws IOException, XPathException { + // Collect all keys from first map for header + final AbstractMapType firstMap = (AbstractMapType) sequence.itemAt(0); + final List keys = new ArrayList<>(); + for (final IEntry entry : firstMap) { + keys.add(entry.key().getStringValue()); + } + Collections.sort(keys); + + // Write header + if (includeHeader) { + writeFields(keys, writer); + writer.write(rowDelimiter); + } + + // Write rows + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (item.getType() == Type.MAP_ITEM) { + final AbstractMapType map = (AbstractMapType) item; + boolean first = true; + for (final String key : keys) { + if (!first) { + writer.write(fieldDelimiter); + } + final Sequence value = map.get(new StringValue(key)); + writer.write(quoteField(value.isEmpty() ? "" : value.getStringValue())); + first = false; + } + } + writer.write(rowDelimiter); + } + } + + private void serializeXmlTable(final Sequence sequence, final Writer writer) throws IOException, XPathException { + // Walk XML table: value + // or
value
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (Type.subTypeOf(item.getType(), Type.ELEMENT)) { + final org.w3c.dom.Element elem = (org.w3c.dom.Element) ((NodeValue) item).getNode(); + serializeXmlElement(elem, writer); + } + } + } + + private void serializeXmlElement(final org.w3c.dom.Element element, final Writer writer) throws IOException { + final org.w3c.dom.NodeList children = element.getChildNodes(); + boolean hasChildElements = false; + for (int i = 0; i < children.getLength(); i++) { + if (children.item(i).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + hasChildElements = true; + break; + } + } + + if (!hasChildElements) { + // Leaf element — output as a field value + writer.write(quoteField(element.getTextContent())); + return; + } + + // Check if children are "record" elements (containing field elements) + // or direct field elements + boolean firstRecord = true; + for (int i = 0; i < children.getLength(); i++) { + if (children.item(i).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + final org.w3c.dom.Element child = (org.w3c.dom.Element) children.item(i); + final org.w3c.dom.NodeList grandchildren = child.getChildNodes(); + boolean hasGrandchildElements = false; + for (int j = 0; j < grandchildren.getLength(); j++) { + if (grandchildren.item(j).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + hasGrandchildElements = true; + break; + } + } + + if (hasGrandchildElements) { + // This is a record element — its children are fields + if (!firstRecord) { + // row delimiter already written + } + boolean firstField = true; + for (int j = 0; j < grandchildren.getLength(); j++) { + if (grandchildren.item(j).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + if (!firstField) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(grandchildren.item(j).getTextContent())); + firstField = false; + } + } + writer.write(rowDelimiter); + firstRecord = false; + } else { + // Direct field element — accumulate as part of a single row + if (!firstRecord) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(child.getTextContent())); + firstRecord = false; + } + } + } + } + + private void serializeAtomicSequence(final Sequence sequence, final Writer writer) throws IOException, XPathException { + boolean first = true; + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (!first) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(i.nextItem().getStringValue())); + first = false; + } + writer.write(rowDelimiter); + } + + private void writeRow(final ArrayType array, final Writer writer) throws IOException, XPathException { + for (int i = 0; i < array.getSize(); i++) { + if (i > 0) { + writer.write(fieldDelimiter); + } + final Sequence member = array.get(i); + writer.write(quoteField(member.isEmpty() ? "" : member.getStringValue())); + } + } + + private void writeSequenceRow(final Sequence sequence, final Writer writer) throws IOException, XPathException { + boolean first = true; + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (!first) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(i.nextItem().getStringValue())); + first = false; + } + } + + private void writeFields(final List fields, final Writer writer) throws IOException { + boolean first = true; + for (final String field : fields) { + if (!first) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(field)); + first = false; + } + } + + /** + * Quote a field value per RFC 4180. + * If alwaysQuote is true, all fields are quoted. + * If false, only fields containing the delimiter, quote char, or newline are quoted. + * Quote characters within the value are escaped by doubling. + */ + private String quoteField(final String value) { + final boolean needsQuoting = alwaysQuote + || value.contains(fieldDelimiter) + || value.indexOf(quoteChar) >= 0 + || value.contains("\n") + || value.contains("\r"); + + if (!needsQuoting) { + return value; + } + + final StringBuilder sb = new StringBuilder(value.length() + 2); + sb.append(quoteChar); + for (int i = 0; i < value.length(); i++) { + final char c = value.charAt(i); + if (c == quoteChar) { + sb.append(quoteChar); // escape by doubling + } + sb.append(c); + } + sb.append(quoteChar); + return sb.toString(); + } +} diff --git a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java index 1dffc3029b7..bc69c4304c6 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java @@ -246,6 +246,23 @@ protected void closeStartTag(boolean isEmpty) throws TransformerException { } } + @Override + public void processingInstruction(String target, String data) throws TransformerException { + try { + closeStartTag(false); + final Writer writer = getWriter(); + writer.write("'); + } catch (IOException e) { + throw new TransformerException(e.getMessage(), e); + } + } + @Override protected boolean needsEscape(char ch) { if (RAW_TEXT_ELEMENTS.contains(currentTag)) { @@ -253,4 +270,20 @@ protected boolean needsEscape(char ch) { } return super.needsEscape(ch); } + + @Override + protected boolean needsEscape(final char ch, final boolean inAttribute) { + // In raw text elements (script, style), suppress escaping for TEXT content only. + // Attribute values must always be escaped, even on raw text elements. + if (!inAttribute && RAW_TEXT_ELEMENTS.contains(currentTag)) { + return false; + } + // For attributes, always return true (bypass the 1-arg override + // which returns false for all script/style content) + if (inAttribute) { + return true; + } + return super.needsEscape(ch, inAttribute); + } + } diff --git a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java index c336d8b2943..db271981892 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java @@ -25,7 +25,9 @@ import java.io.Writer; import java.util.ArrayDeque; import java.util.Deque; +import java.util.HashSet; import java.util.Properties; +import java.util.Set; import javax.xml.transform.OutputKeys; import javax.xml.transform.TransformerException; @@ -48,6 +50,8 @@ public class IndentingXMLWriter extends XMLWriter { private boolean sameline = false; private boolean whitespacePreserve = false; private final Deque whitespacePreserveStack = new ArrayDeque<>(); + private Set suppressIndentation = null; + private int suppressIndentDepth = 0; public IndentingXMLWriter() { super(); @@ -75,6 +79,9 @@ public void startElement(final String namespaceURI, final String localName, fina indent(); } super.startElement(namespaceURI, localName, qname); + if (isSuppressIndentation(localName)) { + suppressIndentDepth++; + } addIndent(); afterTag = true; sameline = true; @@ -86,6 +93,9 @@ public void startElement(final QName qname) throws TransformerException { indent(); } super.startElement(qname); + if (isSuppressIndentation(qname.getLocalPart())) { + suppressIndentDepth++; + } addIndent(); afterTag = true; sameline = true; @@ -95,6 +105,9 @@ public void startElement(final QName qname) throws TransformerException { public void endElement(final String namespaceURI, final String localName, final String qname) throws TransformerException { endIndent(namespaceURI, localName); super.endElement(namespaceURI, localName, qname); + if (isSuppressIndentation(localName) && suppressIndentDepth > 0) { + suppressIndentDepth--; + } popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element sameline = isInlineTag(namespaceURI, localName); afterTag = true; @@ -104,6 +117,9 @@ public void endElement(final String namespaceURI, final String localName, final public void endElement(final QName qname) throws TransformerException { endIndent(qname.getNamespaceURI(), qname.getLocalPart()); super.endElement(qname); + if (isSuppressIndentation(qname.getLocalPart()) && suppressIndentDepth > 0) { + suppressIndentDepth--; + } popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element sameline = isInlineTag(qname.getNamespaceURI(), qname.getLocalPart()); afterTag = true; @@ -165,6 +181,27 @@ public void setOutputProperties(final Properties properties) { LOG.warn("Invalid indentation value: '{}'", option); } indent = "yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no")); + final String suppressProp = outputProperties.getProperty("suppress-indentation"); + if (suppressProp != null && !suppressProp.isEmpty()) { + suppressIndentation = new HashSet<>(); + for (final String name : suppressProp.split("\\s+")) { + if (!name.isEmpty()) { + // Handle URI-qualified names: Q{ns}local or {ns}local → extract local part + if (name.startsWith("Q{") || name.startsWith("{")) { + final int closeBrace = name.indexOf('}'); + if (closeBrace > 0 && closeBrace < name.length() - 1) { + suppressIndentation.add(name.substring(closeBrace + 1)); + } else { + suppressIndentation.add(name); + } + } else { + suppressIndentation.add(name); + } + } + } + } else { + suppressIndentation = null; + } } @Override @@ -220,8 +257,12 @@ protected void addSpaceIfIndent() throws IOException { writer.write(' '); } + private boolean isSuppressIndentation(final String localName) { + return suppressIndentation != null && suppressIndentation.contains(localName); + } + protected void indent() throws TransformerException { - if (!indent || whitespacePreserve) { + if (!indent || whitespacePreserve || suppressIndentDepth > 0) { return; } final int spaces = indentAmount * level; diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java index e89e7119d19..06cb6b9f9f5 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java @@ -128,7 +128,12 @@ protected void writeDoctype(String rootElement) throws TransformerException { return; } - documentType("html", null, null); + // Pass through doctype-public and doctype-system if set + final String publicId = outputProperties != null + ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_PUBLIC) : null; + final String systemId = outputProperties != null + ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_SYSTEM) : null; + documentType("html", publicId, systemId); doctypeWritten = true; } } diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java index b0006f7f51c..0112aceb5cf 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.Writer; +import javax.xml.transform.OutputKeys; import javax.xml.transform.TransformerException; import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; @@ -36,12 +37,35 @@ */ public class XHTMLWriter extends IndentingXMLWriter { + /** + * HTML boolean attributes per HTML 4.01 and HTML5 spec. + * When method="html" and the attribute value equals the attribute name + * (case-insensitive), the attribute is minimized to just the name. + */ + protected static final ObjectSet BOOLEAN_ATTRIBUTES = new ObjectOpenHashSet<>(31); + static { + BOOLEAN_ATTRIBUTES.add("checked"); + BOOLEAN_ATTRIBUTES.add("compact"); + BOOLEAN_ATTRIBUTES.add("declare"); + BOOLEAN_ATTRIBUTES.add("defer"); + BOOLEAN_ATTRIBUTES.add("disabled"); + BOOLEAN_ATTRIBUTES.add("ismap"); + BOOLEAN_ATTRIBUTES.add("multiple"); + BOOLEAN_ATTRIBUTES.add("nohref"); + BOOLEAN_ATTRIBUTES.add("noresize"); + BOOLEAN_ATTRIBUTES.add("noshade"); + BOOLEAN_ATTRIBUTES.add("nowrap"); + BOOLEAN_ATTRIBUTES.add("readonly"); + BOOLEAN_ATTRIBUTES.add("selected"); + } + protected static final ObjectSet EMPTY_TAGS = new ObjectOpenHashSet<>(31); static { EMPTY_TAGS.add("area"); EMPTY_TAGS.add("base"); EMPTY_TAGS.add("br"); EMPTY_TAGS.add("col"); + EMPTY_TAGS.add("embed"); EMPTY_TAGS.add("hr"); EMPTY_TAGS.add("img"); EMPTY_TAGS.add("input"); @@ -88,6 +112,8 @@ public class XHTMLWriter extends IndentingXMLWriter { } protected String currentTag; + protected boolean inHead = false; + protected boolean contentTypeMetaWritten = false; protected final ObjectSet emptyTags; protected final ObjectSet inlineTags; @@ -120,6 +146,13 @@ public XHTMLWriter(final Writer writer, ObjectSet emptyTags, ObjectSet"); + // For method="html", use HTML-style void tags (
) + // For method="xhtml", use XHTML-style (
) + if (isHtmlMethod()) { + getWriter().write(">"); + } else { + getWriter().write(" />"); + } } else { getWriter().write('>'); getWriter().write(") while XHTML uses self-closing (
). + */ + private boolean isHtmlMethod() { + if (outputProperties != null) { + final String method = outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD); + return "html".equalsIgnoreCase(method); + } + return false; + } + + /** + * Returns true if the HTML version is 5.0 or higher. + */ + private boolean isHtml5Version() { + if (outputProperties == null) { + return true; // default to HTML5 + } + final String version = outputProperties.getProperty(OutputKeys.VERSION); + if (version != null) { + try { + return Double.parseDouble(version) >= 5.0; + } catch (final NumberFormatException e) { + // ignore + } + } + return true; // default to HTML5 + } + @Override + public void attribute(final QName qname, final CharSequence value) throws TransformerException { + // For method="html", minimize boolean attributes when value matches name + if (isHtmlMethod() && isBooleanAttribute(qname.getLocalPart(), value)) { + try { + if (!tagIsOpen) { + characters(value); + return; + } + final Writer w = getWriter(); + w.write(' '); + w.write(qname.getLocalPart()); + // Don't write ="value" — minimized form + } catch (final IOException ioe) { + throw new TransformerException(ioe.getMessage(), ioe); + } + return; + } + super.attribute(qname, value); + } + + @Override + public void attribute(final String qname, final CharSequence value) throws TransformerException { + if (isHtmlMethod() && isBooleanAttribute(qname, value)) { + try { + if (!tagIsOpen) { + characters(value); + return; + } + final Writer w = getWriter(); + w.write(' '); + w.write(qname); + } catch (final IOException ioe) { + throw new TransformerException(ioe.getMessage(), ioe); + } + return; + } + super.attribute(qname, value); + } + + private boolean isBooleanAttribute(final String attrName, final CharSequence value) { + return BOOLEAN_ATTRIBUTES.contains(attrName.toLowerCase(java.util.Locale.ROOT)) + && attrName.equalsIgnoreCase(value.toString()); + } + + private static final ObjectSet RAW_TEXT_ELEMENTS_HTML = new ObjectOpenHashSet<>(4); + static { + RAW_TEXT_ELEMENTS_HTML.add("script"); + RAW_TEXT_ELEMENTS_HTML.add("style"); + } + + @Override + protected boolean needsEscape(final char ch, final boolean inAttribute) { + // For HTML method, script and style content should not be escaped + if (!inAttribute && isHtmlMethod() + && currentTag != null && RAW_TEXT_ELEMENTS_HTML.contains(currentTag.toLowerCase(java.util.Locale.ROOT))) { + return false; + } + return super.needsEscape(ch, inAttribute); + } + + /** + * For HTML serialization, cdata-section-elements is ignored per the + * W3C serialization spec — CDATA sections are not valid in HTML. + */ + @Override + protected boolean shouldUseCdataSections() { + if (isHtmlMethod()) { + return false; + } + return super.shouldUseCdataSections(); + } + + @Override + protected boolean escapeAmpersandBeforeBrace() { + // HTML spec: & before { in attribute values should not be escaped + return false; + } + @Override protected boolean isInlineTag(final String namespaceURI, final String localName) { return (namespaceURI == null || namespaceURI.isEmpty() || Namespaces.XHTML_NS.equals(namespaceURI)) && inlineTags.contains(localName); } + + /** + * Write a meta content-type tag as the first child of head when + * include-content-type is enabled (the default per W3C Serialization 3.1). + */ + protected void writeContentTypeMeta() throws TransformerException { + if (contentTypeMetaWritten || outputProperties == null) { + return; + } + final String includeContentType = outputProperties.getProperty("include-content-type", "yes"); + if (!"yes".equals(includeContentType)) { + return; + } + contentTypeMetaWritten = true; + try { + final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8"); + closeStartTag(false); + final Writer writer = getWriter(); + + // HTML5 method uses + // XHTML and HTML4 use + if (isHtmlMethod() && isHtml5Version()) { + writer.write(""); + } else { + final String mediaType = outputProperties.getProperty(OutputKeys.MEDIA_TYPE, "text/html"); + writer.write(""); + } + } catch (IOException e) { + throw new TransformerException(e.getMessage(), e); + } + } } diff --git a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java index 763aaf52ef6..bebc075d316 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java @@ -88,6 +88,24 @@ public class XMLWriter implements SerializerWriter { private boolean xdmSerialization = false; private final Deque elementName = new ArrayDeque<>(); + + /** + * Returns true if cdata-section-elements should be applied. + * Subclasses (e.g., XHTMLWriter for HTML method) can override + * to suppress CDATA sections. + */ + protected boolean shouldUseCdataSections() { + return xdmSerialization; + } + + /** + * Returns the namespace URI of the current (innermost) element, + * or null if no element is on the stack. + */ + protected String currentElementNamespaceURI() { + final QName top = elementName.peek(); + return top != null ? top.getNamespaceURI() : null; + } private LazyVal> cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames); private boolean cdataSetionElement = false; @@ -96,7 +114,7 @@ public class XMLWriter implements SerializerWriter { Arrays.fill(textSpecialChars, false); textSpecialChars['<'] = true; textSpecialChars['>'] = true; - // textSpecialChars['\r'] = true; + textSpecialChars['\r'] = true; textSpecialChars['&'] = true; attrSpecialChars = new boolean[128]; @@ -373,12 +391,35 @@ public void characters(final CharSequence chars) throws TransformerException { if(tagIsOpen) { closeStartTag(false); } - writeChars(chars, false); + // When xdmSerialization is active and current element is in cdata-section-elements, + // wrap text content in CDATA instead of escaping it (per W3C Serialization 3.1) + if (shouldUseCdataSections() && !elementName.isEmpty() + && cdataSectionElements.get().contains(elementName.peek())) { + writeCdataContent(chars); + } else { + writeChars(chars, false); + } } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } } + private void writeCdataContent(final CharSequence chars) throws IOException { + // CDATA sections cannot contain "]]>", so split at those boundaries + final String s = chars.toString(); + int start = 0; + int idx; + while ((idx = s.indexOf("]]>", start)) != -1) { + writer.write(""); + start = idx + 2; // continue from ">" + } + writer.write(""); + } + public void characters(final char[] ch, final int start, final int len) throws TransformerException { if(!declarationWritten) { writeDeclaration(); @@ -545,11 +586,13 @@ protected void writeDeclaration() throws TransformerException { } final String omitXmlDecl = outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); - if ("no".equals(omitXmlDecl)) { + @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE); + // Per W3C Serialization 3.1: output declaration if omit-xml-declaration is false/no/0, + // or if standalone is explicitly set (the declaration is required to carry standalone) + if (isBooleanFalse(omitXmlDecl) || standalone != null) { // get the fields of the declaration from the serialization properties final String version = outputProperties.getProperty(OutputKeys.VERSION, DEFAULT_XML_VERSION); final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, DEFAULT_XML_ENCODING); - @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE); writeDeclaration(version, encoding, standalone); } @@ -589,6 +632,37 @@ protected void writeDoctype(final String rootElement) throws TransformerExceptio protected boolean needsEscape(final char ch) { return true; } + + /** + * Whether & before { should be escaped. HTML output returns false + * per W3C HTML serialization spec. XML output returns true (always escape &). + */ + protected boolean escapeAmpersandBeforeBrace() { + return true; + } + + /** + * Check if a serialization boolean parameter value is false. + * W3C Serialization 3.1 accepts "no", "false", "0" (with optional whitespace) as false. + */ + protected static boolean isBooleanFalse(final String value) { + if (value == null) { + return false; + } + final String trimmed = value.trim(); + return "no".equals(trimmed) || "false".equals(trimmed) || "0".equals(trimmed); + } + + /** + * Whether the given character needs escaping. Subclasses can override + * to suppress escaping for specific contexts (e.g., HTML raw text elements). + * + * @param ch the character to check + * @param inAttribute true if we're writing an attribute value + */ + protected boolean needsEscape(final char ch, final boolean inAttribute) { + return needsEscape(ch); + } protected void writeChars(final CharSequence s, final boolean inAttribute) throws IOException { final boolean[] specialChars = inAttribute ? attrSpecialChars : textSpecialChars; @@ -607,6 +681,12 @@ protected void writeChars(final CharSequence s, final boolean inAttribute) throw } } else if(!charSet.inCharacterSet(ch)) { break; + } else if(ch >= 0x7F && ch <= 0x9F) { + // Control chars 0x7F-0x9F must be serialized as character references + break; + } else if(ch == 0x2028) { + // LINE SEPARATOR must be serialized as character reference + break; } else { i++; } @@ -618,7 +698,7 @@ protected void writeChars(final CharSequence s, final boolean inAttribute) throw return; } - if(needsEscape(ch)) { + if(needsEscape(ch, inAttribute)) { switch(ch) { case '<': writer.write("<"); @@ -627,7 +707,12 @@ protected void writeChars(final CharSequence s, final boolean inAttribute) throw writer.write(">"); break; case '&': - writer.write("&"); + // HTML spec: & before { in attribute values should not be escaped + if (inAttribute && i + 1 < len && s.charAt(i + 1) == '{' && !escapeAmpersandBeforeBrace()) { + writer.write('&'); + } else { + writer.write("&"); + } break; case '\r': writer.write(" "); diff --git a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java index 366e3866cbc..7a8d5f77ce8 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java @@ -32,6 +32,7 @@ import org.xml.sax.SAXNotSupportedException; import javax.xml.transform.OutputKeys; +import java.io.IOException; import java.io.Writer; import java.util.Properties; @@ -70,19 +71,75 @@ public void serialize(final Sequence sequence, final int start, final int howman case "json": serializeJSON(sequence, compilationTime, executionTime); break; + case "csv": + serializeCSV(sequence); + break; case "xml": default: - serializeXML(sequence, start, howmany, wrap, typed, compilationTime, executionTime); + // For XML/text methods, flatten any arrays in the sequence before serialization + // (arrays can't be serialized as SAX events directly) + final Sequence flattened = flattenArrays(sequence); + if (flattened != sequence) { + // Flattening changed the sequence — reset start/howmany to cover all items. + // For text method, default item-separator is space if not explicitly set. + if ("text".equals(method) && outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR) == null) { + outputProperties.setProperty(EXistOutputKeys.ITEM_SEPARATOR, " "); + } + serializeXML(flattened, 1, flattened.getItemCount(), wrap, typed, compilationTime, executionTime); + } else { + serializeXML(flattened, start, howmany, wrap, typed, compilationTime, executionTime); + } break; } } + /** + * Flatten arrays in a sequence — each array member becomes a top-level item. + * This is needed because the SAX-based XML/text serializer can't handle ArrayType items. + */ + private static Sequence flattenArrays(final Sequence sequence) throws XPathException { + boolean hasArrays = false; + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (i.nextItem().getType() == Type.ARRAY_ITEM) { + hasArrays = true; + break; + } + } + if (!hasArrays) { + return sequence; + } + final ValueSequence result = new ValueSequence(); + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (item.getType() == Type.ARRAY_ITEM) { + final Sequence flat = org.exist.xquery.functions.array.ArrayType.flatten(item); + for (final SequenceIterator fi = flat.iterate(); fi.hasNext(); ) { + result.add(fi.nextItem()); + } + } else { + result.add(item); + } + } + return result; + } + public boolean normalize() { final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml"); - return !("json".equals(method) || "adaptive".equals(method)); + return !("json".equals(method) || "adaptive".equals(method) || "csv".equals(method)); } private void serializeXML(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException { + final String itemSeparator = outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR); + // If item-separator is set and sequence has multiple items, serialize items individually + // with separator between them (the internal Serializer doesn't handle item-separator) + if (itemSeparator != null && sequence.getItemCount() > 1 && !wrap) { + serializeXMLWithItemSeparator(sequence, start, howmany, typed, itemSeparator); + } else { + serializeXMLDirect(sequence, start, howmany, wrap, typed, compilationTime, executionTime); + } + } + + private void serializeXMLDirect(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException { final Serializer serializer = broker.borrowSerializer(); SAXSerializer sax = null; try { @@ -102,17 +159,53 @@ private void serializeXML(final Sequence sequence, final int start, final int ho } } + private void serializeXMLWithItemSeparator(final Sequence sequence, final int start, final int howmany, final boolean typed, final String itemSeparator) throws SAXException, XPathException { + final int actualStart = start - 1; // convert 1-based to 0-based + final int end = Math.min(actualStart + howmany, sequence.getItemCount()); + for (int i = actualStart; i < end; i++) { + if (i > actualStart) { + try { + writer.write(itemSeparator); + } catch (IOException e) { + throw new SAXException(e.getMessage(), e); + } + } + final Item item = sequence.itemAt(i); + if (item == null) { + continue; + } + if (Type.subTypeOf(item.getType(), Type.NODE)) { + final ValueSequence singleItem = new ValueSequence(1); + singleItem.add(item); + serializeXMLDirect(singleItem, 1, 1, false, typed, 0, 0); + } else { + try { + writer.write(item.getStringValue()); + } catch (IOException e) { + throw new SAXException(e.getMessage(), e); + } + } + } + } + private void serializeJSON(final Sequence sequence, final long compilationTime, final long executionTime) throws SAXException, XPathException { - // backwards compatibility: if the sequence contains a single element, we assume - // it should be transformed to JSON following the rules of the old JSON writer + // Backwards compatibility: if the sequence contains a single element or document, + // use the legacy XML-to-JSON writer (which converts XML structure to JSON properties). + // This is needed for RESTXQ and REST API which return XML documents with method=json. + // Maps, arrays, atomics, and multi-item sequences go through the W3C-compliant JSONSerializer. if (sequence.hasOne() && (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) { - serializeXML(sequence, 1, 1, false, false, compilationTime, executionTime); + serializeXMLDirect(sequence, 1, 1, false, false, compilationTime, executionTime); } else { JSONSerializer serializer = new JSONSerializer(broker, outputProperties); serializer.serialize(sequence, writer); } } + private void serializeCSV(final Sequence sequence) throws SAXException { + final CSVSerializer serializer = new CSVSerializer(outputProperties); + serializer.serialize(sequence, writer); + } + private void serializeAdaptive(final Sequence sequence) throws SAXException, XPathException { final AdaptiveSerializer serializer = new AdaptiveSerializer(broker); serializer.setOutput(writer, outputProperties); diff --git a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java index bd1f01a9454..3fc36dd3f4d 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java @@ -23,10 +23,12 @@ import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.json.JsonWriteFeature; import io.lacuna.bifurcan.IEntry; import org.exist.storage.DBBroker; import org.exist.storage.serializers.EXistOutputKeys; import org.exist.storage.serializers.Serializer; +import org.exist.xquery.ErrorCodes; import org.exist.xquery.XPathException; import org.exist.xquery.functions.array.ArrayType; import org.exist.xquery.functions.map.MapType; @@ -36,40 +38,55 @@ import javax.xml.transform.OutputKeys; import java.io.IOException; import java.io.Writer; +import java.util.HashSet; import java.util.Properties; +import java.util.Set; /** * Called by {@link org.exist.util.serializer.XQuerySerializer} to serialize an XQuery sequence * to JSON. The JSON serializer differs from other serialization methods because it maps XQuery * data items to JSON. * + * Per W3C XSLT and XQuery Serialization 3.1 Section 10 (JSON Output Method). + * * @author Wolf */ public class JSONSerializer { private final DBBroker broker; private final Properties outputProperties; + private final boolean allowDuplicateNames; public JSONSerializer(DBBroker broker, Properties outputProperties) { super(); this.broker = broker; this.outputProperties = outputProperties; + this.allowDuplicateNames = "yes".equals( + outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes")); } public void serialize(Sequence sequence, Writer writer) throws SAXException { - JsonFactory factory = new JsonFactory(); + // QT4: escape-solidus controls whether / is escaped as \/ (default: true) + final boolean escapeSolidus = !isBooleanFalse( + outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "yes")); + final JsonFactory factory = JsonFactory.builder() + .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus) + .build(); try { JsonGenerator generator = factory.createGenerator(writer); generator.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); if ("yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"))) { generator.useDefaultPrettyPrinter(); } - if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"))) { - generator.enable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + // Duplicate detection is handled manually in serializeMap for proper SERE0022 errors + generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + final boolean jsonLines = isBooleanTrue( + outputProperties.getProperty(EXistOutputKeys.JSON_LINES, "no")); + if (jsonLines) { + serializeJsonLines(sequence, generator); } else { - generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + serializeSequence(sequence, generator); } - serializeSequence(sequence, generator); if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.INSERT_FINAL_NEWLINE, "no"))) { generator.writeRaw('\n'); } @@ -79,12 +96,55 @@ public void serialize(Sequence sequence, Writer writer) throws SAXException { } } + /** + * JSON Lines format (NDJSON): one JSON value per line, no array wrapper. + * Per QT4 Serialization 4.0, when json-lines=true. + */ + private void serializeJsonLines(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException { + if (sequence.isEmpty()) { + return; + } + // Each line must be a separate root-level value. Jackson adds separator + // whitespace between root values, so we serialize each item to a string + // and concatenate with newlines. + final boolean escapeSolidus = !isBooleanFalse( + outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "yes")); + boolean first = true; + for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (!first) { + generator.writeRaw('\n'); + } + // Serialize this item to a standalone string + final java.io.StringWriter lineWriter = new java.io.StringWriter(); + final JsonFactory lineFactory = JsonFactory.builder() + .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus) + .build(); + final JsonGenerator lineGen = lineFactory.createGenerator(lineWriter); + lineGen.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); + serializeItem(i.nextItem(), lineGen); + lineGen.close(); + // Write the line's JSON as raw content to avoid Jackson's root separator + generator.writeRaw(lineWriter.toString()); + first = false; + } + } + private void serializeSequence(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException { + serializeSequence(sequence, generator, false); + } + + private void serializeSequence(Sequence sequence, JsonGenerator generator, boolean allowMultiItem) throws IOException, XPathException, SAXException { if (sequence.isEmpty()) { generator.writeNull(); } else if (sequence.hasOne() && "no".equals(outputProperties.getProperty(EXistOutputKeys.JSON_ARRAY_OUTPUT, "no"))) { serializeItem(sequence.itemAt(0), generator); + } else if (!allowMultiItem) { + // SERE0023: JSON output method cannot serialize a sequence of more than one item + // at the top level or as a map entry value + throw new SAXException("err:SERE0023 Sequence of " + sequence.getItemCount() + + " items cannot be serialized using the JSON output method"); } else { + // Inside arrays, multi-item sequences become JSON arrays generator.writeStartArray(); for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) { serializeItem(i.nextItem(), generator); @@ -99,23 +159,52 @@ private void serializeItem(Item item, JsonGenerator generator) throws IOExceptio } else if (item.getType() == Type.MAP_ITEM) { serializeMap((MapType) item, generator); } else if (Type.subTypeOf(item.getType(), Type.ANY_ATOMIC_TYPE)) { - if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) { - generator.writeNumber(item.getStringValue()); - } else { - switch (item.getType()) { - case Type.BOOLEAN: - generator.writeBoolean(((AtomicValue)item).effectiveBooleanValue()); - break; - default: - generator.writeString(item.getStringValue()); - break; - } - } + serializeAtomicValue(item, generator); } else if (Type.subTypeOf(item.getType(), Type.NODE)) { serializeNode(item, generator); + } else if (Type.subTypeOf(item.getType(), Type.FUNCTION)) { + throw new SAXException("err:SERE0021 Sequence contains a function item, which cannot be serialized as JSON"); } } + private void serializeAtomicValue(Item item, JsonGenerator generator) throws IOException, XPathException, SAXException { + if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) { + final String stringValue = item.getStringValue(); + // Handle special float/double values per W3C Serialization + if ("NaN".equals(stringValue)) { + // QT4: NaN serializes as JSON null + generator.writeNull(); + } else if ("INF".equals(stringValue)) { + // QT4: +INF serializes as 1e9999 + generator.writeRawValue("1e9999"); + } else if ("-INF".equals(stringValue)) { + // QT4: -INF serializes as -1e9999 + generator.writeRawValue("-1e9999"); + } else if ("-0".equals(stringValue)) { + // Negative zero: write as 0 (QT4 allows either 0 or -0) + generator.writeNumber(stringValue); + } else { + generator.writeNumber(stringValue); + } + } else if (item.getType() == Type.BOOLEAN) { + generator.writeBoolean(((AtomicValue) item).effectiveBooleanValue()); + } else { + generator.writeString(item.getStringValue()); + } + } + + private static boolean isBooleanTrue(final String value) { + if (value == null) return false; + final String v = value.trim(); + return "yes".equals(v) || "true".equals(v) || "1".equals(v); + } + + private static boolean isBooleanFalse(final String value) { + if (value == null) return false; + final String v = value.trim(); + return "no".equals(v) || "false".equals(v) || "0".equals(v); + } + private void serializeNode(Item item, JsonGenerator generator) throws SAXException { final Serializer serializer = broker.borrowSerializer(); final Properties xmlOutput = new Properties(); @@ -136,16 +225,22 @@ private void serializeArray(ArrayType array, JsonGenerator generator) throws IOE generator.writeStartArray(); for (int i = 0; i < array.getSize(); i++) { final Sequence member = array.get(i); - serializeSequence(member, generator); + // Array members can be multi-item sequences — each becomes a nested JSON array + serializeSequence(member, generator, true); } generator.writeEndArray(); } private void serializeMap(MapType map, JsonGenerator generator) throws IOException, XPathException, SAXException { generator.writeStartObject(); + final Set seenKeys = allowDuplicateNames ? null : new HashSet<>(); for (final IEntry entry: map) { - generator.writeFieldName(entry.key().getStringValue()); - serializeSequence(entry.value(), generator); + final String key = entry.key().getStringValue(); + if (seenKeys != null && !seenKeys.add(key)) { + throw new SAXException("err:SERE0022 Duplicate key '" + key + "' in map and allow-duplicate-names is 'no'"); + } + generator.writeFieldName(key); + serializeSequence(entry.value(), generator, false); } generator.writeEndObject(); } diff --git a/exist-core/src/main/java/org/exist/xquery/CastExpression.java b/exist-core/src/main/java/org/exist/xquery/CastExpression.java index 8911c5c6144..3c08eb19a69 100644 --- a/exist-core/src/main/java/org/exist/xquery/CastExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/CastExpression.java @@ -84,13 +84,15 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr } } - // Should be handled by the parser - if (requiredType == Type.ANY_ATOMIC_TYPE || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) { + // XPST0080: cannot cast to abstract or special types + if (requiredType == Type.ANY_ATOMIC_TYPE || requiredType == Type.ANY_SIMPLE_TYPE + || requiredType == Type.ANY_TYPE || requiredType == Type.UNTYPED + || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) { throw new XPathException(this, ErrorCodes.XPST0080, "cannot cast to " + Type.getTypeName(requiredType)); } - if (requiredType == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) { - throw new XPathException(this, ErrorCodes.XPST0051, "cannot cast to " + Type.getTypeName(requiredType)); + if (expression.returnsType() == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.UNTYPED) { + throw new XPathException(this, ErrorCodes.XPST0051, "cannot cast from " + Type.getTypeName(expression.returnsType())); } final Sequence result; diff --git a/exist-core/src/main/java/org/exist/xquery/CastableExpression.java b/exist-core/src/main/java/org/exist/xquery/CastableExpression.java index 9a0769f9653..0dc465c049f 100644 --- a/exist-core/src/main/java/org/exist/xquery/CastableExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/CastableExpression.java @@ -93,11 +93,13 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());} } - if (requiredType == Type.ANY_ATOMIC_TYPE || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) + if (requiredType == Type.ANY_ATOMIC_TYPE || requiredType == Type.ANY_SIMPLE_TYPE + || requiredType == Type.ANY_TYPE || requiredType == Type.UNTYPED + || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) {throw new XPathException(this, ErrorCodes.XPST0080, "cannot convert to " + Type.getTypeName(requiredType));} - if (requiredType == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) - {throw new XPathException(this, ErrorCodes.XPST0051, "cannot convert to " + Type.getTypeName(requiredType));} + if (expression.returnsType() == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.UNTYPED) + {throw new XPathException(this, ErrorCodes.XPST0051, "cannot convert from " + Type.getTypeName(expression.returnsType()));} Sequence result; //See : http://article.gmane.org/gmane.text.xml.xquery.general/1413 diff --git a/exist-core/src/main/java/org/exist/xquery/ChoiceCastExpression.java b/exist-core/src/main/java/org/exist/xquery/ChoiceCastExpression.java new file mode 100644 index 00000000000..1f58834103f --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/ChoiceCastExpression.java @@ -0,0 +1,137 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +/** + * Implements cast as (T1 | T2 | ...) from XQuery 4.0. + * Tries each target type in order and returns the first successful cast. + */ +public class ChoiceCastExpression extends AbstractExpression { + + private final int[] targetTypes; + private final Cardinality cardinality; + private Expression expression; + + public ChoiceCastExpression(final XQueryContext context, final Expression expr, + final int[] targetTypes, final Cardinality cardinality) { + super(context); + this.targetTypes = targetTypes; + this.cardinality = cardinality; + this.expression = expr; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + expression.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence seq = Atomize.atomize(expression.eval(contextSequence, contextItem)); + if (seq.isEmpty()) { + if (cardinality.atLeastOne()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Type error: empty sequence is not allowed here"); + } + return Sequence.EMPTY_SEQUENCE; + } + if (seq.hasMany()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "cardinality error: sequence with more than one item is not allowed here"); + } + + final Item item = seq.itemAt(0); + XPathException lastError = null; + + for (final int targetType : targetTypes) { + try { + return item.convertTo(targetType); + } catch (final XPathException e) { + lastError = e; + } + } + + throw new XPathException(this, ErrorCodes.FORG0001, + "Cannot cast " + Type.getTypeName(item.getType()) + + " to any of the choice types", lastError); + } + + @Override + public int returnsType() { + return Type.ANY_ATOMIC_TYPE; + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_ONE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + expression.dump(dumper); + dumper.display(" cast as ("); + for (int i = 0; i < targetTypes.length; i++) { + if (i > 0) { + dumper.display(" | "); + } + dumper.display(Type.getTypeName(targetTypes[i])); + } + dumper.display(")"); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(expression.toString()).append(" cast as ("); + for (int i = 0; i < targetTypes.length; i++) { + if (i > 0) { + sb.append(" | "); + } + sb.append(Type.getTypeName(targetTypes[i])); + } + sb.append(")"); + return sb.toString(); + } + + @Override + public int getDependencies() { + return expression.getDependencies() | Dependency.CONTEXT_ITEM; + } + + @Override + public void setContextDocSet(final DocumentSet contextSet) { + super.setContextDocSet(contextSet); + expression.setContextDocSet(contextSet); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + expression.resetState(postOptimization); + } + +} diff --git a/exist-core/src/main/java/org/exist/xquery/ChoiceCastableExpression.java b/exist-core/src/main/java/org/exist/xquery/ChoiceCastableExpression.java new file mode 100644 index 00000000000..4d867b21e44 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/ChoiceCastableExpression.java @@ -0,0 +1,128 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +/** + * Implements castable as (T1 | T2 | ...) from XQuery 4.0. + * Returns true if the value can be cast to any of the target types. + */ +public class ChoiceCastableExpression extends AbstractExpression { + + private final int[] targetTypes; + private final Cardinality requiredCardinality; + private final Expression expression; + + public ChoiceCastableExpression(final XQueryContext context, final Expression expr, + final int[] targetTypes, final Cardinality requiredCardinality) { + super(context); + this.expression = expr; + this.targetTypes = targetTypes; + this.requiredCardinality = requiredCardinality; + } + + @Override + public int returnsType() { + return Type.BOOLEAN; + } + + @Override + public Cardinality getCardinality() { + return Cardinality.EXACTLY_ONE; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + expression.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence seq = Atomize.atomize(expression.eval(contextSequence, contextItem)); + if (seq.isEmpty()) { + return BooleanValue.valueOf( + requiredCardinality.isSuperCardinalityOrEqualOf(Cardinality.EMPTY_SEQUENCE)); + } + if (!requiredCardinality.isSuperCardinalityOrEqualOf(seq.getCardinality())) { + return BooleanValue.FALSE; + } + + final Item item = seq.itemAt(0); + for (final int targetType : targetTypes) { + try { + item.convertTo(targetType); + return BooleanValue.TRUE; + } catch (final XPathException e) { + // try next type + } + } + return BooleanValue.FALSE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + expression.dump(dumper); + dumper.display(" castable as ("); + for (int i = 0; i < targetTypes.length; i++) { + if (i > 0) { + dumper.display(" | "); + } + dumper.display(Type.getTypeName(targetTypes[i])); + } + dumper.display(")"); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(expression.toString()).append(" castable as ("); + for (int i = 0; i < targetTypes.length; i++) { + if (i > 0) { + sb.append(" | "); + } + sb.append(Type.getTypeName(targetTypes[i])); + } + sb.append(")"); + return sb.toString(); + } + + @Override + public int getDependencies() { + return Dependency.CONTEXT_SET + Dependency.CONTEXT_ITEM; + } + + @Override + public void setContextDocSet(final DocumentSet contextSet) { + super.setContextDocSet(contextSet); + expression.setContextDocSet(contextSet); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + expression.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/Constants.java b/exist-core/src/main/java/org/exist/xquery/Constants.java index 7a5069d7416..62f16a2d304 100644 --- a/exist-core/src/main/java/org/exist/xquery/Constants.java +++ b/exist-core/src/main/java/org/exist/xquery/Constants.java @@ -46,7 +46,11 @@ public interface Constants { "following-sibling", "namespace", "self", - "attribute-descendant" + "attribute-descendant", + "following-or-self", + "preceding-or-self", + "following-sibling-or-self", + "preceding-sibling-or-self" }; /** @@ -73,6 +77,12 @@ public interface Constants { //combines /descendant-or-self::node()/attribute:* int DESCENDANT_ATTRIBUTE_AXIS = 13; + /** XQuery 4.0 axes */ + int FOLLOWING_OR_SELF_AXIS = 14; + int PRECEDING_OR_SELF_AXIS = 15; + int FOLLOWING_SIBLING_OR_SELF_AXIS = 16; + int PRECEDING_SIBLING_OR_SELF_AXIS = 17; + /** * Node types */ diff --git a/exist-core/src/main/java/org/exist/xquery/DynamicCardinalityCheck.java b/exist-core/src/main/java/org/exist/xquery/DynamicCardinalityCheck.java index 5accad4503e..39cab3d7d42 100644 --- a/exist-core/src/main/java/org/exist/xquery/DynamicCardinalityCheck.java +++ b/exist-core/src/main/java/org/exist/xquery/DynamicCardinalityCheck.java @@ -82,7 +82,14 @@ else if (seq.hasMany()) error.addArgs(ExpressionDumper.dump(expression), requiredCardinality.getHumanDescription(), seq.getItemCount()); - throw new XPathException(this, error.toString()); + final String errCode = error.getErrorCode(); + final ErrorCodes.ErrorCode xpathErrCode; + if ("XPDY0050".equals(errCode)) { + xpathErrCode = ErrorCodes.XPDY0050; + } else { + xpathErrCode = ErrorCodes.XPTY0004; + } + throw new XPathException(this, xpathErrCode, error.toString()); } if (context.getProfiler().isEnabled()) {context.getProfiler().end(this, "", seq);} diff --git a/exist-core/src/main/java/org/exist/xquery/DynamicTypeCheck.java b/exist-core/src/main/java/org/exist/xquery/DynamicTypeCheck.java index 1f32cbca2a8..5395fc7d1d3 100644 --- a/exist-core/src/main/java/org/exist/xquery/DynamicTypeCheck.java +++ b/exist-core/src/main/java/org/exist/xquery/DynamicTypeCheck.java @@ -35,11 +35,17 @@ public class DynamicTypeCheck extends AbstractExpression { final private Expression expression; final private int requiredType; - + final private ErrorCodes.ErrorCode errorCode; + public DynamicTypeCheck(XQueryContext context, int requiredType, Expression expr) { + this(context, requiredType, expr, null); + } + + public DynamicTypeCheck(XQueryContext context, int requiredType, Expression expr, ErrorCodes.ErrorCode errorCode) { super(context); this.requiredType = requiredType; this.expression = expr; + this.errorCode = errorCode; } /* (non-Javadoc) @@ -73,6 +79,10 @@ else if (!seq.isEmpty()) { return result == null ? seq : result; } + private ErrorCodes.ErrorCode effectiveErrorCode() { + return errorCode != null ? errorCode : ErrorCodes.XPTY0004; + } + private void check(Sequence result, Item item) throws XPathException { int type = item.getType(); if (type == Type.NODE && @@ -82,6 +92,12 @@ private void check(Sequence result, Item item) throws XPathException { //Retrieve the actual node {type= ((NodeProxy) item).getNode().getNodeType();} } + // Record types: maps can satisfy record types structurally + if (requiredType == Type.RECORD && Type.subTypeOf(type, Type.MAP_ITEM)) { + // Let SequenceType.checkRecordType() handle structural validation + if (result != null) { result.add(item); } + return; + } if(type != requiredType && !Type.subTypeOf(type, requiredType)) { //TODO : how to make this block more generic ? -pb if (type == Type.UNTYPED_ATOMIC) { @@ -89,7 +105,7 @@ private void check(Sequence result, Item item) throws XPathException { item = item.convertTo(requiredType); //No way } catch (final XPathException e) { - throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'"); } @@ -103,7 +119,7 @@ private void check(Sequence result, Item item) throws XPathException { item = item.convertTo(requiredType); //No way } catch (final XPathException e) { - throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'"); } @@ -116,7 +132,7 @@ private void check(Sequence result, Item item) throws XPathException { item = item.convertTo(requiredType); //No way } catch (final XPathException e) { - throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'"); } @@ -128,7 +144,7 @@ private void check(Sequence result, Item item) throws XPathException { item = item.convertTo(requiredType); //No way } catch (final XPathException e) { - throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'"); } @@ -141,12 +157,12 @@ private void check(Sequence result, Item item) throws XPathException { type = Type.STRING; } else { if (!(Type.subTypeOf(type, requiredType))) { - throw new XPathException(expression, ErrorCodes.XPTY0004, + throw new XPathException(expression, effectiveErrorCode(), Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ") is not a sub-type of " + Type.getTypeName(requiredType)); } else - {throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + {throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'");} } diff --git a/exist-core/src/main/java/org/exist/xquery/EnumCastExpression.java b/exist-core/src/main/java/org/exist/xquery/EnumCastExpression.java new file mode 100644 index 00000000000..bf0fc6ce7b2 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/EnumCastExpression.java @@ -0,0 +1,141 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +/** + * Implements cast as enum("a","b","c") and castable as enum("a","b","c") from XQuery 4.0. + */ +public class EnumCastExpression extends AbstractExpression { + + private final String[] enumValues; + private final Cardinality cardinality; + private final Expression expression; + private final boolean isCastable; + + public EnumCastExpression(final XQueryContext context, final Expression expr, + final String[] enumValues, final Cardinality cardinality, + final boolean isCastable) { + super(context); + this.expression = expr; + this.enumValues = enumValues; + this.cardinality = cardinality; + this.isCastable = isCastable; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + expression.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence seq = Atomize.atomize(expression.eval(contextSequence, contextItem)); + + if (seq.isEmpty()) { + if (isCastable) { + return BooleanValue.valueOf( + cardinality.isSuperCardinalityOrEqualOf(Cardinality.EMPTY_SEQUENCE)); + } + if (cardinality.atLeastOne()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Type error: empty sequence is not allowed here"); + } + return Sequence.EMPTY_SEQUENCE; + } + + final String value = seq.itemAt(0).getStringValue(); + + for (final String enumVal : enumValues) { + if (enumVal.equals(value)) { + if (isCastable) { + return BooleanValue.TRUE; + } + return new StringValue(this, value); + } + } + + if (isCastable) { + return BooleanValue.FALSE; + } + throw new XPathException(this, ErrorCodes.FORG0001, + "Cannot cast '" + value + "' to enum type"); + } + + @Override + public int returnsType() { + return isCastable ? Type.BOOLEAN : Type.STRING; + } + + @Override + public Cardinality getCardinality() { + return isCastable ? Cardinality.EXACTLY_ONE : Cardinality.ZERO_OR_ONE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + expression.dump(dumper); + dumper.display(isCastable ? " castable as enum(" : " cast as enum("); + for (int i = 0; i < enumValues.length; i++) { + if (i > 0) { + dumper.display(", "); + } + dumper.display("\"" + enumValues[i] + "\""); + } + dumper.display(")"); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(expression.toString()).append(isCastable ? " castable as enum(" : " cast as enum("); + for (int i = 0; i < enumValues.length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append("\"").append(enumValues[i]).append("\""); + } + sb.append(")"); + return sb.toString(); + } + + @Override + public int getDependencies() { + return expression.getDependencies() | Dependency.CONTEXT_ITEM; + } + + @Override + public void setContextDocSet(final DocumentSet contextSet) { + super.setContextDocSet(contextSet); + expression.setContextDocSet(contextSet); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + expression.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java b/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java index 23226a155f2..a137a093137 100644 --- a/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java +++ b/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java @@ -176,6 +176,9 @@ public class ErrorCodes { public static final ErrorCode FORX0002 = new W3CErrorCode("FORX0002", "Invalid regular expression."); public static final ErrorCode FORX0003 = new W3CErrorCode("FORX0003", "Regular expression matches zero-length string."); public static final ErrorCode FORX0004 = new W3CErrorCode("FORX0004", "Invalid replacement string."); + public static final ErrorCode FOCV0001 = new W3CErrorCode("FOCV0001", "CSV quote error."); + public static final ErrorCode FOCV0002 = new W3CErrorCode("FOCV0002", "Invalid CSV delimiter."); + public static final ErrorCode FOCV0003 = new W3CErrorCode("FOCV0003", "Conflicting CSV delimiters."); public static final ErrorCode FOTY0012 = new W3CErrorCode("FOTY0012", "Argument node does not have a typed value."); public static final ErrorCode FOTY0013 = new W3CErrorCode("FOTY0013", "The argument to fn:data() contains a function item."); @@ -211,11 +214,13 @@ public class ErrorCodes { public static final ErrorCode FTDY0020 = new W3CErrorCode("FTDY0020", ""); public static final ErrorCode FODC0006 = new W3CErrorCode("FODC0006", "String passed to fn:parse-xml is not a well-formed XML document."); + public static final ErrorCode FODC0011 = new W3CErrorCode("FODC0011", "HTML parsing error."); public static final ErrorCode FOAP0001 = new W3CErrorCode("FOAP0001", "Wrong number of arguments"); /* XQuery 3.1 */ public static final ErrorCode XQTY0105 = new W3CErrorCode("XQTY0105", "It is a type error if the content sequence in an element constructor contains a function."); + public static final ErrorCode XQTY0153 = new W3CErrorCode("XQTY0153", "It is a type error if the finally clause of a try/catch expression evaluates to a non-empty sequence."); public static final ErrorCode FOAY0001 = new W3CErrorCode("FOAY0001", "Array index out of bounds."); public static final ErrorCode FOAY0002 = new W3CErrorCode("FOAY0002", "Negative array length."); @@ -241,6 +246,10 @@ public class ErrorCodes { public static final ErrorCode FOXT0004 = new W3CErrorCode("FOXT0004", "XSLT transformation has been disabled"); public static final ErrorCode FOXT0006 = new W3CErrorCode("FOXT0006", "XSLT output contains non-accepted characters"); + // Invisible XML errors + public static final ErrorCode FOIX0001 = new W3CErrorCode("FOIX0001", "Invalid ixml grammar"); + public static final ErrorCode FOIX0002 = new W3CErrorCode("FOIX0002", "ixml parse error"); + public static final ErrorCode XTSE0165 = new W3CErrorCode("XTSE0165","It is a static error if the processor is not able to retrieve the resource identified by the URI reference [ in the href attribute of xsl:include or xsl:import] , or if the resource that is retrieved does not contain a stylesheet module conforming to this specification."); /* eXist specific XQuery and XPath errors diff --git a/exist-core/src/main/java/org/exist/xquery/FLWORClause.java b/exist-core/src/main/java/org/exist/xquery/FLWORClause.java index d56ed4777d2..ea632d51e17 100644 --- a/exist-core/src/main/java/org/exist/xquery/FLWORClause.java +++ b/exist-core/src/main/java/org/exist/xquery/FLWORClause.java @@ -34,7 +34,8 @@ public interface FLWORClause extends Expression { enum ClauseType { - FOR, LET, GROUPBY, ORDERBY, WHERE, SOME, EVERY, COUNT, WINDOW + FOR, LET, GROUPBY, ORDERBY, WHERE, WHILE, SOME, EVERY, COUNT, WINDOW, FOR_MEMBER, FOR_KEY, FOR_VALUE, FOR_KEY_VALUE, + LET_SEQ_DESTRUCTURE, LET_ARRAY_DESTRUCTURE, LET_MAP_DESTRUCTURE } /** diff --git a/exist-core/src/main/java/org/exist/xquery/FilterExprAM.java b/exist-core/src/main/java/org/exist/xquery/FilterExprAM.java new file mode 100644 index 00000000000..f07af305e12 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/FilterExprAM.java @@ -0,0 +1,242 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Implements the XQuery 4.0 array/map filter expression ({@code ?[predicate]}). + * + *

For arrays, iterates over members and keeps those where the predicate + * evaluates to true with the context item set to each member. + * Numeric predicates select by position (1-based).

+ * + *

For maps, iterates over entries and keeps those where the predicate + * evaluates to true with the context item set to + * {@code map { "key": key, "value": value }} for each entry. + * Numeric predicates select by position in insertion order.

+ */ +public class FilterExprAM extends AbstractExpression { + + private Expression contextExpr; + private Expression predicate; + + public FilterExprAM(final XQueryContext context, final Expression contextExpr, final Expression predicate) { + super(context); + this.contextExpr = contextExpr; + this.predicate = predicate; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextExpr.analyze(contextInfo); + final AnalyzeContextInfo predicateInfo = new AnalyzeContextInfo(contextInfo); + predicate.analyze(predicateInfo); + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence input = contextExpr.eval(contextSequence, null); + + if (input.isEmpty()) { + return input; + } + + final Item item = input.itemAt(0); + if (Type.subTypeOf(item.getType(), Type.ARRAY_ITEM)) { + return filterArray((ArrayType) item); + } else if (Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + return filterMap((AbstractMapType) item); + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "?[] filter requires an array or map, got " + Type.getTypeName(item.getType())); + } + } + + private ArrayType filterArray(final ArrayType array) throws XPathException { + final int size = array.getSize(); + + // Build a context sequence of all member items for position()/last() + final ValueSequence contextSeq = new ValueSequence(size); + final List members = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + final Sequence member = array.get(i); + members.add(member); + // For context sequence, we need each member as an item. + // If a member is a sequence, wrap it — but for position/last to work + // we need exactly `size` items in the context sequence. + if (member.isEmpty()) { + // Empty sequence member: use empty sequence as placeholder + contextSeq.add(AtomicValue.EMPTY_VALUE); + } else if (member.getItemCount() == 1) { + contextSeq.add(member.itemAt(0)); + } else { + // Multi-item member: use first item as representative for context + contextSeq.add(member.itemAt(0)); + } + } + + final int savedPos = context.getContextPosition(); + final Sequence savedSeq = context.getContextSequence(); + try { + final ArrayType result = new ArrayType(context, new ArrayList<>()); + for (int i = 0; i < size; i++) { + final Sequence member = members.get(i); + context.setContextSequencePosition(i, contextSeq); + + final Sequence predResult = predicate.eval(member, null); + if (isSelected(predResult, i + 1)) { + result.add(member); + } + } + return result; + } finally { + context.setContextSequencePosition(savedPos, savedSeq); + } + } + + private AbstractMapType filterMap(final AbstractMapType map) throws XPathException { + final Sequence keys = map.keys(); + final int size = keys.getItemCount(); + + // Build entry maps and context sequence for position/last + final ValueSequence contextSeq = new ValueSequence(size); + final List keyList = new ArrayList<>(size); + final List entryMaps = new ArrayList<>(size); + + for (final SequenceIterator i = keys.iterate(); i.hasNext(); ) { + final AtomicValue key = (AtomicValue) i.nextItem(); + keyList.add(key); + final Sequence value = map.get(key); + + final MapType entryMap = new MapType(context, null); + entryMap.add(new StringValue(this, "key"), key.toSequence()); + entryMap.add(new StringValue(this, "value"), value); + entryMaps.add(entryMap); + contextSeq.add(entryMap); + } + + final int savedPos = context.getContextPosition(); + final Sequence savedSeq = context.getContextSequence(); + try { + final MapType result = new MapType(context, null); + for (int i = 0; i < size; i++) { + context.setContextSequencePosition(i, contextSeq); + final AbstractMapType entryMap = entryMaps.get(i); + + final Sequence predResult = predicate.eval(entryMap.toSequence(), null); + if (isSelected(predResult, i + 1)) { + result.add(keyList.get(i), map.get(keyList.get(i))); + } + } + return result; + } finally { + context.setContextSequencePosition(savedPos, savedSeq); + } + } + + /** + * Determines whether a member/entry at the given 1-based position is selected + * by the predicate result, following XQ4 array/map filter semantics: + * - If the result is a single numeric value, select if it equals the position. + * - If the result is a multi-item all-numeric sequence, select if any value + * equals the position (XQ4 extension for ?[] filters). + * - If the result is a multi-item sequence mixing numeric and non-numeric, + * raise FORG0006. + * - Otherwise, evaluate effective boolean value. + */ + private boolean isSelected(final Sequence predResult, final int position) throws XPathException { + if (predResult.isEmpty()) { + return false; + } + + // Single numeric value: positional predicate + if (predResult.hasOne() && Type.subTypeOfUnion(predResult.itemAt(0).getType(), Type.NUMERIC)) { + final double pos = ((NumericValue) predResult.itemAt(0)).getDouble(); + return pos == position; + } + + // Multi-item sequence starting with numeric: check all items are numeric + if (predResult.getItemCount() > 1 && + Type.subTypeOfUnion(predResult.itemAt(0).getType(), Type.NUMERIC)) { + for (final SequenceIterator i = predResult.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (!Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) { + throw new XPathException((Expression) null, ErrorCodes.FORG0006, + "Mixed numeric and non-numeric values in filter predicate"); + } + final double pos = ((NumericValue) item).getDouble(); + if (pos == position) { + return true; + } + } + return false; + } + + // Boolean predicate + return predResult.effectiveBooleanValue(); + } + + @Override + public int returnsType() { + return Type.ITEM; + } + + @Override + public Cardinality getCardinality() { + return Cardinality.EXACTLY_ONE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + contextExpr.dump(dumper); + dumper.display("?["); + predicate.dump(dumper); + dumper.display("]"); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + contextExpr.resetState(postOptimization); + predicate.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/FocusFunction.java b/exist-core/src/main/java/org/exist/xquery/FocusFunction.java new file mode 100644 index 00000000000..28d930a3102 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/FocusFunction.java @@ -0,0 +1,140 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +import java.util.ArrayDeque; +import java.util.List; + +/** + * Implements XQuery 4.0 focus functions: {@code fn { expr }} and {@code function { expr }}. + * + *

A focus function is an inline function with an implicit single parameter + * of type {@code item()*}. When called, the argument is bound as the context + * item for the body expression.

+ * + *

Formally: {@code fn { EXPR }} is equivalent to + * {@code function($dot as item()*) as item()* { EXPR }} where EXPR is + * evaluated with the context value set to {@code $dot}.

+ */ +public class FocusFunction extends AbstractExpression { + + public static final String FOCUS_PARAM_NAME = ".focus"; + + private final UserDefinedFunction function; + private final ArrayDeque calls = new ArrayDeque<>(); + private AnalyzeContextInfo cachedContextInfo; + + public FocusFunction(final XQueryContext context, final UserDefinedFunction function) { + super(context); + this.function = function; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + cachedContextInfo.addFlag(SINGLE_STEP_EXECUTION); + cachedContextInfo.setParent(this); + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("fn "); + function.dump(dumper); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) + throws XPathException { + final List closureVars = context.getLocalStack(); + + final FunctionCall call = new FocusFunctionCall(context, function); + call.getFunction().setClosureVariables(closureVars); + call.setLocation(function.getLine(), function.getColumn()); + call.analyze(new AnalyzeContextInfo(cachedContextInfo)); + + calls.push(call); + + return new FunctionReference(this, call); + } + + @Override + public int returnsType() { + return Type.FUNCTION; + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + calls.clear(); + function.resetState(postOptimization); + } + + /** + * A specialized FunctionCall that sets the argument as context item + * before evaluating the function body. + */ + public static class FocusFunctionCall extends FunctionCall { + + public FocusFunctionCall(final XQueryContext context, final UserDefinedFunction function) { + super(context, function); + } + + @Override + public Sequence evalFunction(final Sequence contextSequence, final Item contextItem, + final Sequence[] seq, final DocumentSet[] contextDocs) throws XPathException { + // The focus function's single argument becomes the context item + // for the body evaluation. + final Sequence focusArg = (seq != null && seq.length > 0) ? seq[0] : Sequence.EMPTY_SEQUENCE; + + context.stackEnter(this); + final LocalVariable mark = context.markLocalVariables(true); + if (getFunction().getClosureVariables() != null) { + context.restoreStack(getFunction().getClosureVariables()); + } + try { + // Bind the implicit parameter + final UserDefinedFunction func = getFunction(); + if (!func.getParameters().isEmpty()) { + final LocalVariable var = new LocalVariable( + func.getParameters().get(0)); + var.setValue(focusArg); + context.declareVariableBinding(var); + } + + // Evaluate the body with the argument as context + final Expression body = func.getFunctionBody(); + if (focusArg.getItemCount() == 1) { + return body.eval(focusArg, focusArg.itemAt(0)); + } else { + return body.eval(focusArg, null); + } + } finally { + context.popLocalVariables(mark); + context.stackLeave(this); + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/ForExpr.java b/exist-core/src/main/java/org/exist/xquery/ForExpr.java index 1a5eab2f4dd..577784de185 100644 --- a/exist-core/src/main/java/org/exist/xquery/ForExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/ForExpr.java @@ -176,15 +176,23 @@ public Sequence eval(Sequence contextSequence, Item contextItem) // Loop through each variable binding int p = 0; - if (in.isEmpty() && allowEmpty) { - processItem(var, AtomicValue.EMPTY_VALUE, Sequence.EMPTY_SEQUENCE, resultSequence, at, p); - } else { - for (final SequenceIterator i = in.iterate(); i.hasNext(); p++) { - processItem(var, i.nextItem(), in, resultSequence, at, p); + try { + if (in.isEmpty() && allowEmpty) { + processItem(var, AtomicValue.EMPTY_VALUE, Sequence.EMPTY_SEQUENCE, resultSequence, at, p); + } else { + for (final SequenceIterator i = in.iterate(); i.hasNext() && !WhileClause.isTerminated(); p++) { + processItem(var, i.nextItem(), in, resultSequence, at, p); + } } + } catch (final WhileClause.WhileTerminationException e) { + // while clause signaled end of iteration for this for loop + } + // clear terminated flag if this is the outermost for + if (isOuterFor && WhileClause.isTerminated()) { + WhileClause.clearTerminated(); } } finally { - // restore the local variable stack + // restore the local variable stack context.popLocalVariables(mark, resultSequence); } diff --git a/exist-core/src/main/java/org/exist/xquery/ForKeyValueExpr.java b/exist-core/src/main/java/org/exist/xquery/ForKeyValueExpr.java new file mode 100644 index 00000000000..6d416b5077e --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/ForKeyValueExpr.java @@ -0,0 +1,306 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +import java.util.HashSet; +import java.util.Set; + +/** + * Implements the XQuery 4.0 "for key", "for value", and "for key/value" clauses. + * + *

{@code for key $k in map-expr} iterates over the keys of a map.

+ *

{@code for value $v in map-expr} iterates over the values of a map.

+ *

{@code for key $k value $v in map-expr} iterates over key-value pairs.

+ */ +public class ForKeyValueExpr extends BindingExpression { + + private final ClauseType clauseType; + private QName positionalVariable = null; + private QName valueVariable = null; + private SequenceType valueSequenceType = null; + + public ForKeyValueExpr(final XQueryContext context, final ClauseType clauseType) { + super(context); + this.clauseType = clauseType; + } + + public void setPositionalVariable(final QName variable) { + positionalVariable = variable; + } + + public void setValueVariable(final QName variable) { + valueVariable = variable; + } + + public void setValueSequenceType(final SequenceType type) { + valueSequenceType = type; + } + + @Override + public ClauseType getType() { + return clauseType; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + super.analyze(contextInfo); + final LocalVariable mark = context.markLocalVariables(false); + try { + contextInfo.setParent(this); + final AnalyzeContextInfo varContextInfo = new AnalyzeContextInfo(contextInfo); + inputSequence.analyze(varContextInfo); + final LocalVariable inVar = new LocalVariable(varName); + inVar.setSequenceType(sequenceType); + inVar.setStaticType(Type.ITEM); + context.declareVariableBinding(inVar); + if (valueVariable != null) { + final LocalVariable valVar = new LocalVariable(valueVariable); + valVar.setSequenceType(valueSequenceType); + valVar.setStaticType(Type.ITEM); + context.declareVariableBinding(valVar); + } + if (positionalVariable != null) { + final LocalVariable posVar = new LocalVariable(positionalVariable); + posVar.setSequenceType(POSITIONAL_VAR_TYPE); + posVar.setStaticType(Type.INTEGER); + context.declareVariableBinding(posVar); + } + + final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); + newContextInfo.addFlag(SINGLE_STEP_EXECUTION); + returnExpr.analyze(newContextInfo); + } finally { + context.popLocalVariables(mark); + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) + throws XPathException { + if (context.getProfiler().isEnabled()) { + context.getProfiler().start(this); + context.getProfiler().message(this, Profiler.DEPENDENCIES, + "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies())); + if (contextSequence != null) { + context.getProfiler().message(this, Profiler.START_SEQUENCES, + "CONTEXT SEQUENCE", contextSequence); + } + } + context.expressionStart(this); + + final LocalVariable mark = context.markLocalVariables(false); + final Sequence resultSequence = new ValueSequence(unordered); + try { + final Sequence in = inputSequence.eval(contextSequence, null); + + if (in.isEmpty()) { + // Empty map produces no iterations + } else if (in.getItemCount() != 1 || !(in.itemAt(0) instanceof AbstractMapType)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "for " + clauseLabel() + + " expression requires a single map, got " + + Type.getTypeName(in.getItemType())); + } else { + final AbstractMapType map = (AbstractMapType) in.itemAt(0); + final LocalVariable var = createVariable(varName); + var.setSequenceType(sequenceType); + context.declareVariableBinding(var); + + LocalVariable valVar = null; + if (valueVariable != null) { + valVar = new LocalVariable(valueVariable); + valVar.setSequenceType(valueSequenceType); + context.declareVariableBinding(valVar); + } + + LocalVariable at = null; + if (positionalVariable != null) { + at = new LocalVariable(positionalVariable); + at.setSequenceType(POSITIONAL_VAR_TYPE); + context.declareVariableBinding(at); + } + + final Sequence keys = map.keys(); + int pos = 0; + try { + for (final SequenceIterator i = keys.iterate(); i.hasNext() && !WhileClause.isTerminated(); ) { + context.proceed(this); + final AtomicValue key = (AtomicValue) i.nextItem(); + pos++; + + final Sequence bindValue; + if (clauseType == ClauseType.FOR_VALUE) { + bindValue = map.get(key); + } else { + // FOR_KEY or FOR_KEY_VALUE: primary var is key + bindValue = key; + } + var.setValue(bindValue); + + if (valVar != null) { + valVar.setValue(map.get(key)); + } + + if (positionalVariable != null) { + at.setValue(new IntegerValue(this, pos)); + } + if (sequenceType != null) { + var.checkType(); + } + if (valVar != null && valueSequenceType != null) { + valVar.checkType(); + } + + final Sequence returnResult; + if (returnExpr instanceof OrderByClause) { + returnResult = returnExpr.eval(bindValue, null); + } else { + returnResult = returnExpr.eval(null, null); + } + resultSequence.addAll(returnResult); + var.destroy(context, resultSequence); + } + } catch (final WhileClause.WhileTerminationException e) { + // while clause signaled end of iteration + } + if (getPreviousClause() == null && WhileClause.isTerminated()) { + WhileClause.clearTerminated(); + } + } + } finally { + context.popLocalVariables(mark, resultSequence); + } + + if (callPostEval()) { + final Sequence postResult = postEval(resultSequence); + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", postResult); + } + return postResult; + } + + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", resultSequence); + } + return resultSequence; + } + + private String clauseLabel() { + switch (clauseType) { + case FOR_KEY: return "key"; + case FOR_VALUE: return "value"; + case FOR_KEY_VALUE: return "key/value"; + default: return "key"; + } + } + + private boolean callPostEval() { + FLWORClause prev = getPreviousClause(); + while (prev != null) { + switch (prev.getType()) { + case LET: + case FOR: + case FOR_MEMBER: + case FOR_KEY: + case FOR_VALUE: + case FOR_KEY_VALUE: + return false; + case ORDERBY: + case GROUPBY: + return true; + } + prev = prev.getPreviousClause(); + } + return true; + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("for " + clauseLabel() + " ", line); + dumper.startIndent(); + dumper.display("$").display(varName); + if (valueVariable != null) { + dumper.display(" value $").display(valueVariable); + } + if (sequenceType != null) { + dumper.display(" as ").display(sequenceType); + } + dumper.display(" in "); + inputSequence.dump(dumper); + dumper.endIndent().nl(); + if (returnExpr instanceof LetExpr) { + dumper.display(" ", returnExpr.getLine()); + } else { + dumper.display("return", returnExpr.getLine()); + } + dumper.startIndent(); + returnExpr.dump(dumper); + dumper.endIndent().nl(); + } + + @Override + public String toString() { + final StringBuilder result = new StringBuilder(); + result.append("for ").append(clauseLabel()).append(" "); + result.append("$").append(varName); + if (valueVariable != null) { + result.append(" value $").append(valueVariable); + } + if (sequenceType != null) { + result.append(" as ").append(sequenceType); + } + result.append(" in "); + result.append(inputSequence.toString()); + result.append(" "); + if (returnExpr instanceof LetExpr) { + result.append(" "); + } else { + result.append("return "); + } + result.append(returnExpr.toString()); + return result.toString(); + } + + @Override + public Set getTupleStreamVariables() { + final Set variables = new HashSet<>(); + final QName variable = getVariable(); + if (variable != null) { + variables.add(variable); + } + if (valueVariable != null) { + variables.add(valueVariable); + } + final LocalVariable startVar = getStartVariable(); + if (startVar != null) { + variables.add(startVar.getQName()); + } + return variables; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/ForMemberExpr.java b/exist-core/src/main/java/org/exist/xquery/ForMemberExpr.java new file mode 100644 index 00000000000..74e3b2c2369 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/ForMemberExpr.java @@ -0,0 +1,237 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.value.*; + +import java.util.HashSet; +import java.util.Set; + +/** + * Implements the XQuery 4.0 "for member" clause in FLWOR expressions. + * + *

{@code for member $m in $array-expr} iterates over the members of an array, + * binding each member (which is a sequence) to the variable.

+ */ +public class ForMemberExpr extends BindingExpression { + + private QName positionalVariable = null; + + public ForMemberExpr(final XQueryContext context) { + super(context); + } + + public void setPositionalVariable(final QName variable) { + positionalVariable = variable; + } + + @Override + public ClauseType getType() { + return ClauseType.FOR_MEMBER; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + super.analyze(contextInfo); + final LocalVariable mark = context.markLocalVariables(false); + try { + contextInfo.setParent(this); + final AnalyzeContextInfo varContextInfo = new AnalyzeContextInfo(contextInfo); + inputSequence.analyze(varContextInfo); + final LocalVariable inVar = new LocalVariable(varName); + inVar.setSequenceType(sequenceType); + inVar.setStaticType(Type.ITEM); + context.declareVariableBinding(inVar); + if (positionalVariable != null) { + final LocalVariable posVar = new LocalVariable(positionalVariable); + posVar.setSequenceType(POSITIONAL_VAR_TYPE); + posVar.setStaticType(Type.INTEGER); + context.declareVariableBinding(posVar); + } + + final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); + newContextInfo.addFlag(SINGLE_STEP_EXECUTION); + returnExpr.analyze(newContextInfo); + } finally { + context.popLocalVariables(mark); + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) + throws XPathException { + if (context.getProfiler().isEnabled()) { + context.getProfiler().start(this); + context.getProfiler().message(this, Profiler.DEPENDENCIES, + "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies())); + if (contextSequence != null) { + context.getProfiler().message(this, Profiler.START_SEQUENCES, + "CONTEXT SEQUENCE", contextSequence); + } + } + context.expressionStart(this); + + final LocalVariable mark = context.markLocalVariables(false); + final Sequence resultSequence = new ValueSequence(unordered); + try { + final Sequence in = inputSequence.eval(contextSequence, null); + + if (!(in instanceof ArrayType)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "for member expression requires an array, got " + + Type.getTypeName(in.getItemType())); + } + + final ArrayType array = (ArrayType) in; + final LocalVariable var = createVariable(varName); + var.setSequenceType(sequenceType); + context.declareVariableBinding(var); + + LocalVariable at = null; + if (positionalVariable != null) { + at = new LocalVariable(positionalVariable); + at.setSequenceType(POSITIONAL_VAR_TYPE); + context.declareVariableBinding(at); + } + + try { + for (int i = 0; i < array.getSize() && !WhileClause.isTerminated(); i++) { + context.proceed(this); + final Sequence member = array.get(i); + var.setValue(member); + if (positionalVariable != null) { + at.setValue(new IntegerValue(this, i + 1)); + } + if (sequenceType == null) { + var.checkType(); + } + + final Sequence returnResult; + if (returnExpr instanceof OrderByClause) { + returnResult = returnExpr.eval(member, null); + } else { + returnResult = returnExpr.eval(null, null); + } + resultSequence.addAll(returnResult); + var.destroy(context, resultSequence); + } + } catch (final WhileClause.WhileTerminationException e) { + // while clause signaled end of iteration + } + if (getPreviousClause() == null && WhileClause.isTerminated()) { + WhileClause.clearTerminated(); + } + } finally { + context.popLocalVariables(mark, resultSequence); + } + + if (callPostEval()) { + final Sequence postResult = postEval(resultSequence); + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", postResult); + } + return postResult; + } + + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", resultSequence); + } + return resultSequence; + } + + private boolean callPostEval() { + FLWORClause prev = getPreviousClause(); + while (prev != null) { + switch (prev.getType()) { + case LET: + case FOR: + case FOR_MEMBER: + return false; + case ORDERBY: + case GROUPBY: + return true; + } + prev = prev.getPreviousClause(); + } + return true; + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("for member ", line); + dumper.startIndent(); + dumper.display("$").display(varName); + if (sequenceType != null) { + dumper.display(" as ").display(sequenceType); + } + dumper.display(" in "); + inputSequence.dump(dumper); + dumper.endIndent().nl(); + if (returnExpr instanceof LetExpr) { + dumper.display(" ", returnExpr.getLine()); + } else { + dumper.display("return", returnExpr.getLine()); + } + dumper.startIndent(); + returnExpr.dump(dumper); + dumper.endIndent().nl(); + } + + @Override + public String toString() { + final StringBuilder result = new StringBuilder(); + result.append("for member "); + result.append("$").append(varName); + if (sequenceType != null) { + result.append(" as ").append(sequenceType); + } + result.append(" in "); + result.append(inputSequence.toString()); + result.append(" "); + if (returnExpr instanceof LetExpr) { + result.append(" "); + } else { + result.append("return "); + } + result.append(returnExpr.toString()); + return result.toString(); + } + + @Override + public Set getTupleStreamVariables() { + final Set variables = new HashSet<>(); + final QName variable = getVariable(); + if (variable != null) { + variables.add(variable); + } + final LocalVariable startVar = getStartVariable(); + if (startVar != null) { + variables.add(startVar.getQName()); + } + return variables; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/Function.java b/exist-core/src/main/java/org/exist/xquery/Function.java index 161cba2957b..a22837100ab 100644 --- a/exist-core/src/main/java/org/exist/xquery/Function.java +++ b/exist-core/src/main/java/org/exist/xquery/Function.java @@ -212,10 +212,29 @@ public void setParent(final Expression parent) { * @throws XPathException if an error occurs setting the arguments */ public void setArguments(final List arguments) throws XPathException { - if ((!mySignature.isVariadic()) && arguments.size() != mySignature.getArgumentCount()) { - throw new XPathException(this, ErrorCodes.XPST0017, - "Number of arguments of function " + getName() + " doesn't match function signature (expected " - + mySignature.getArgumentCount() + ", got " + arguments.size() + ')'); + final int argCount = mySignature.getArgumentCount(); + if ((!mySignature.isVariadic()) && arguments.size() != argCount) { + // XQ4: Allow fewer arguments if trailing params have default values + if (arguments.size() < argCount) { + boolean hasDefaults = true; + final SequenceType[] argTypes = mySignature.getArgumentTypes(); + for (int i = arguments.size(); i < argCount; i++) { + if (!(argTypes[i] instanceof FunctionParameterSequenceType) || + !((FunctionParameterSequenceType) argTypes[i]).hasDefaultValue()) { + hasDefaults = false; + break; + } + } + if (!hasDefaults) { + throw new XPathException(this, ErrorCodes.XPST0017, + "Number of arguments of function " + getName() + " doesn't match function signature (expected " + + argCount + ", got " + arguments.size() + ')'); + } + } else { + throw new XPathException(this, ErrorCodes.XPST0017, + "Number of arguments of function " + getName() + " doesn't match function signature (expected " + + argCount + ", got " + arguments.size() + ')'); + } } steps.clear(); diff --git a/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java b/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java index adcf7d3d5cb..07d6a924516 100644 --- a/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java +++ b/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java @@ -54,6 +54,17 @@ public static Expression createFunction(XQueryContext context, XQueryAST ast, Pa } catch(final QName.IllegalQNameException xpe) { throw new XPathException(ast, ErrorCodes.XPST0081, "Invalid qname " + ast.getText() + ". " + xpe.getMessage()); } + // XQ4 (PR2200): for unprefixed function calls, check if there's a + // no-namespace user-defined function that should override fn: + if (context.getXQueryVersion() >= 40 + && !ast.getText().contains(":") + && Namespaces.XPATH_FUNCTIONS_NS.equals(qname.getNamespaceURI())) { + final QName noNsName = new QName(ast.getText(), ""); + final UserDefinedFunction noNsFunc = context.resolveFunction(noNsName, params.size()); + if (noNsFunc != null) { + qname = noNsName; + } + } return createFunction(context, qname, ast, parent, params); } @@ -240,12 +251,25 @@ private static GeneralComparison equals(XQueryContext context, XQueryAST ast, private static CastExpression castExpression(XQueryContext context, XQueryAST ast, List params, QName qname) throws XPathException { - if (params.size() != 1) { + final Expression arg; + if (params.size() == 1) { + arg = params.getFirst(); + } else if (params.isEmpty() && context.getXQueryVersion() >= 31) { + // XQ4 focus constructor: xs:type() uses context item as argument + arg = new ContextItemExpression(context); + ((ContextItemExpression) arg).setLocation(ast.getLine(), ast.getColumn()); + } else { throw new XPathException(ast.getLine(), ast.getColumn(), ErrorCodes.XPST0017, "Wrong number of arguments for constructor function"); } - final Expression arg = params.getFirst(); - final int code = Type.getType(qname); + final int code; + try { + code = Type.getType(qname); + } catch (final XPathException e) { + // Unknown type name in xs: namespace → XPST0017 (no such function) + throw new XPathException(ast.getLine(), ast.getColumn(), + ErrorCodes.XPST0017, "Unknown constructor function: " + qname.getStringValue()); + } final CastExpression castExpr = new CastExpression(context, arg, code, Cardinality.ZERO_OR_ONE); castExpr.setLocation(ast.getLine(), ast.getColumn()); return castExpr; @@ -305,10 +329,34 @@ private static Function functionCall(final XQueryContext context, * @param throwOnNotFound true to throw an XPST0017 if the functions is not found, false to just return null */ private static @Nullable Function getInternalModuleFunction(final XQueryContext context, - final XQueryAST ast, final List params, QName qname, Module module, + final XQueryAST ast, List params, QName qname, Module module, final boolean throwOnNotFound) throws XPathException { //For internal modules: create a new function instance from the class - FunctionDef def = ((InternalModule) module).getFunctionDef(qname, params.size()); + final boolean hasKeywordArgs = hasKeywordArguments(params); + FunctionDef def = null; + + // When keyword args are present, skip the initial arity-based lookup because + // params.size() may not match the correct overload. Instead, resolve keyword + // args against all signatures (largest arity first) to find the right one. + if (hasKeywordArgs) { + final List funcs = ((InternalModule) module).getFunctionsByName(qname); + // Sort by arity descending — keyword args typically target the largest overload + funcs.sort((a, b) -> b.getArgumentCount() - a.getArgumentCount()); + for (final FunctionSignature sig : funcs) { + final List resolved = resolveKeywordArguments(context, params, sig, ast); + if (resolved != null) { + def = ((InternalModule) module).getFunctionDef(qname, sig.getArgumentCount()); + if (def != null) { + params = resolved; + break; + } + } + } + } + + if (def == null && !hasKeywordArgs) { + def = ((InternalModule) module).getFunctionDef(qname, params.size()); + } //TODO: rethink: xsl namespace function should search xpath one too if (def == null && Namespaces.XSL_NS.equals(qname.getNamespaceURI())) { //Search xpath namespace @@ -360,7 +408,12 @@ private static Function functionCall(final XQueryContext context, "Access to deprecated functions is not allowed. Call to '" + qname.getStringValue() + "()' denied. " + def.getSignature().getDeprecated()); } final Function fn = Function.createFunction(context, ast, module, def); - fn.setArguments(params); + if (hasKeywordArgs) { + final List resolved = resolveKeywordArguments(context, params, def.getSignature(), ast); + fn.setArguments(resolved != null ? resolved : params); + } else { + fn.setArguments(params); + } fn.setASTNode(ast); return new InternalFunctionCall(fn); } @@ -370,11 +423,36 @@ private static Function functionCall(final XQueryContext context, */ private static FunctionCall getUserDefinedFunction(XQueryContext context, XQueryAST ast, List params, QName qname) throws XPathException { final FunctionCall fc; - final UserDefinedFunction func = context.resolveFunction(qname, params.size()); + final boolean hasKeywordArgs = hasKeywordArguments(params); + + // Count positional arguments to determine resolution arity + int positionalCount = params.size(); + if (hasKeywordArgs) { + positionalCount = 0; + for (final Expression param : params) { + if (param instanceof KeywordArgumentExpression) { + break; + } + positionalCount++; + } + } + + UserDefinedFunction func = context.resolveFunction(qname, params.size()); + + // If keyword args and no exact match, try resolving with positional count + if (func == null && hasKeywordArgs && positionalCount != params.size()) { + func = context.resolveFunction(qname, positionalCount); + } + if (func != null) { fc = new FunctionCall(context, func); fc.setLocation(ast.getLine(), ast.getColumn()); - fc.setArguments(params); + if (hasKeywordArgs) { + final List resolved = resolveKeywordArguments(context, params, func.getSignature(), ast); + fc.setArguments(resolved != null ? resolved : params); + } else { + fc.setArguments(params); + } } else { //Create a forward reference which will be resolved later fc = new FunctionCall(context, qname, params); @@ -482,4 +560,120 @@ public static FunctionCall wrap(XQueryContext context, Function call) throws XPa wrappedCall.setArguments(wrapperArgs); return wrappedCall; } + + /** + * Check if any parameter is a keyword argument. + */ + private static boolean hasKeywordArguments(final List params) { + for (final Expression param : params) { + if (param instanceof KeywordArgumentExpression) { + return true; + } + } + return false; + } + + /** + * Resolve keyword arguments to positional arguments using the function signature. + * + * Keyword arguments (name := value) are matched to the corresponding parameter + * position in the function signature. Positional arguments must come before + * keyword arguments. Gaps between positional and keyword arguments are filled + * with empty sequence expressions for optional parameters. Returns null if + * resolution fails. + */ + private static @Nullable List resolveKeywordArguments( + final XQueryContext context, + final List params, final FunctionSignature signature, + final XQueryAST ast) throws XPathException { + final SequenceType[] argTypes = signature.getArgumentTypes(); + if (argTypes == null) { + return null; + } + + // Find where keyword arguments start + int firstKeyword = -1; + for (int i = 0; i < params.size(); i++) { + if (params.get(i) instanceof KeywordArgumentExpression) { + firstKeyword = i; + break; + } + } + if (firstKeyword < 0) { + return params; // no keyword args + } + + // Build the resolved argument list + final List resolved = new ArrayList<>(argTypes.length); + + // Copy positional arguments + for (int i = 0; i < firstKeyword; i++) { + resolved.add(params.get(i)); + } + + // Fill remaining positions with nulls (to be filled by keyword args) + for (int i = firstKeyword; i < argTypes.length; i++) { + resolved.add(null); + } + + // Match keyword arguments to parameter positions + for (int i = firstKeyword; i < params.size(); i++) { + final Expression param = params.get(i); + if (!(param instanceof KeywordArgumentExpression)) { + throw new XPathException(ast.getLine(), ast.getColumn(), + ErrorCodes.XPST0003, + "Positional arguments must not follow keyword arguments"); + } + final KeywordArgumentExpression kwArg = (KeywordArgumentExpression) param; + final String kwName = kwArg.getKeywordName(); + + // Find matching parameter by name + int matchPos = -1; + for (int j = firstKeyword; j < argTypes.length; j++) { + if (argTypes[j] instanceof org.exist.xquery.value.FunctionParameterSequenceType) { + final String paramName = ((org.exist.xquery.value.FunctionParameterSequenceType) argTypes[j]) + .getAttributeName(); + if (kwName.equals(paramName)) { + matchPos = j; + break; + } + } + } + + if (matchPos < 0) { + return null; // no matching parameter found — signature mismatch + } + if (resolved.get(matchPos) != null) { + throw new XPathException(ast.getLine(), ast.getColumn(), + ErrorCodes.XPST0003, + "Duplicate keyword argument: " + kwName); + } + resolved.set(matchPos, kwArg.getArgument()); + } + + // Fill gaps: for parameters that allow empty sequences or have defaults, + // supply an empty sequence expression. This enables keyword arguments to + // skip optional positional parameters in overloaded built-in functions. + for (int i = 0; i < resolved.size(); i++) { + if (resolved.get(i) == null) { + if (argTypes[i] instanceof org.exist.xquery.value.FunctionParameterSequenceType) { + final org.exist.xquery.value.FunctionParameterSequenceType pst = + (org.exist.xquery.value.FunctionParameterSequenceType) argTypes[i]; + if (pst.hasDefaultValue()) { + resolved.set(i, pst.getDefaultValue()); + } else if (pst.getCardinality().isSuperCardinalityOrEqualOf( + org.exist.xquery.Cardinality.EMPTY_SEQUENCE)) { + // Parameter allows empty — fill with empty sequence + resolved.set(i, new PathExpr(context)); + } else { + return null; // required parameter missing + } + } else { + return null; // can't determine if parameter is optional + } + } + } + + return resolved; + } } diff --git a/exist-core/src/main/java/org/exist/xquery/KeywordArgumentExpression.java b/exist-core/src/main/java/org/exist/xquery/KeywordArgumentExpression.java new file mode 100644 index 00000000000..6bd237072a9 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/KeywordArgumentExpression.java @@ -0,0 +1,85 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; + +/** + * Wraps a function argument expression with a keyword name for XQuery 4.0 + * keyword argument syntax: {@code fn:slice($input, start := 3)}. + * + *

This is a transient wrapper used during function call construction. + * The keyword name is used to match the argument to the correct parameter + * position in the function signature.

+ */ +public class KeywordArgumentExpression extends AbstractExpression { + + private final String keywordName; + private final Expression argument; + + public KeywordArgumentExpression(final XQueryContext context, final String keywordName, + final Expression argument) { + super(context); + this.keywordName = keywordName; + this.argument = argument; + } + + public String getKeywordName() { + return keywordName; + } + + public Expression getArgument() { + return argument; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) + throws XPathException { + return argument.eval(contextSequence, contextItem); + } + + @Override + public int returnsType() { + return argument.returnsType(); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + argument.analyze(contextInfo); + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display(keywordName); + dumper.display(" := "); + argument.dump(dumper); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + argument.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/LetDestructureExpr.java b/exist-core/src/main/java/org/exist/xquery/LetDestructureExpr.java new file mode 100644 index 00000000000..39e93d9d045 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/LetDestructureExpr.java @@ -0,0 +1,330 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Implements XQuery 4.0 let destructuring: + *
    + *
  • {@code let $($x, $y) := (1, 2)} — sequence destructuring
  • + *
  • {@code let $[$x, $y] := [1, 2]} — array destructuring
  • + *
  • {@code let ${$x, $y} := map{'x':1,'y':2}} — map destructuring
  • + *
+ */ +public class LetDestructureExpr extends AbstractFLWORClause { + + public enum DestructureMode { + SEQUENCE, ARRAY, MAP + } + + private final DestructureMode mode; + private final List varNames; + private final List varTypes; + private Expression inputSequence; + private SequenceType overallType; + + public LetDestructureExpr(final XQueryContext context, final DestructureMode mode) { + super(context); + this.mode = mode; + this.varNames = new ArrayList<>(); + this.varTypes = new ArrayList<>(); + } + + public void addVariable(final QName name, final SequenceType type) { + varNames.add(name); + varTypes.add(type); + } + + public void setInputSequence(final Expression seq) { + this.inputSequence = seq.simplify(); + } + + public void setOverallType(final SequenceType type) { + this.overallType = type; + } + + @Override + public ClauseType getType() { + switch (mode) { + case SEQUENCE: return ClauseType.LET_SEQ_DESTRUCTURE; + case ARRAY: return ClauseType.LET_ARRAY_DESTRUCTURE; + case MAP: return ClauseType.LET_MAP_DESTRUCTURE; + default: return ClauseType.LET; + } + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + final LocalVariable mark = context.markLocalVariables(false); + try { + contextInfo.setParent(this); + final AnalyzeContextInfo varContextInfo = new AnalyzeContextInfo(contextInfo); + inputSequence.analyze(varContextInfo); + + for (int i = 0; i < varNames.size(); i++) { + final LocalVariable var = new LocalVariable(varNames.get(i)); + if (varTypes.get(i) != null) { + var.setSequenceType(varTypes.get(i)); + } + context.declareVariableBinding(var); + } + + context.setContextSequencePosition(0, null); + returnExpr.analyze(contextInfo); + } finally { + context.popLocalVariables(mark); + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + context.expressionStart(this); + context.pushDocumentContext(); + try { + final LocalVariable mark = context.markLocalVariables(false); + Sequence resultSequence = null; + try { + final Sequence input = inputSequence.eval(contextSequence, null); + + switch (mode) { + case SEQUENCE: + bindSequenceVars(input); + break; + case ARRAY: + bindArrayVars(input); + break; + case MAP: + bindMapVars(input); + break; + } + + resultSequence = returnExpr.eval(contextSequence, null); + } finally { + context.popLocalVariables(mark, resultSequence); + } + if (resultSequence == null) { + return Sequence.EMPTY_SEQUENCE; + } + if (getPreviousClause() == null) { + resultSequence = postEval(resultSequence); + } + return resultSequence; + } finally { + context.popDocumentContext(); + context.expressionEnd(this); + } + } + + private void bindSequenceVars(final Sequence input) throws XPathException { + for (int i = 0; i < varNames.size(); i++) { + final LocalVariable var = createVariable(varNames.get(i)); + final SequenceType type = varTypes.get(i); + if (type != null) { + var.setSequenceType(type); + } + context.declareVariableBinding(var); + + if (i < input.getItemCount()) { + var.setValue(input.itemAt(i).toSequence()); + } else { + var.setValue(Sequence.EMPTY_SEQUENCE); + } + if (type != null) { + checkVarType(var, type); + } + } + } + + private void bindArrayVars(final Sequence input) throws XPathException { + if (input.isEmpty()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Array destructuring requires an array, got empty sequence"); + } + final Item item = input.itemAt(0); + if (!Type.subTypeOf(item.getType(), Type.ARRAY_ITEM)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Array destructuring requires an array, got " + + Type.getTypeName(item.getType())); + } + final ArrayType array = (ArrayType) item; + for (int i = 0; i < varNames.size(); i++) { + final LocalVariable var = createVariable(varNames.get(i)); + final SequenceType type = varTypes.get(i); + if (type != null) { + var.setSequenceType(type); + } + context.declareVariableBinding(var); + + if (i < array.getSize()) { + var.setValue(array.get(i)); + } else { + var.setValue(Sequence.EMPTY_SEQUENCE); + } + if (type != null) { + checkVarType(var, type); + } + } + } + + private void bindMapVars(final Sequence input) throws XPathException { + if (input.isEmpty()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Map destructuring requires a map, got empty sequence"); + } + final Item item = input.itemAt(0); + if (!Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Map destructuring requires a map, got " + + Type.getTypeName(item.getType())); + } + final AbstractMapType map = (AbstractMapType) item; + for (int i = 0; i < varNames.size(); i++) { + final QName qn = varNames.get(i); + final LocalVariable var = createVariable(qn); + final SequenceType type = varTypes.get(i); + if (type != null) { + var.setSequenceType(type); + } + context.declareVariableBinding(var); + + final Sequence value = map.get(new StringValue(this, qn.getLocalPart())); + if (value != null && !value.isEmpty()) { + var.setValue(value); + } else { + var.setValue(Sequence.EMPTY_SEQUENCE); + } + if (type != null) { + checkVarType(var, type); + } + } + } + + private void checkVarType(final LocalVariable var, final SequenceType type) throws XPathException { + final Sequence val = var.getValue(); + if (val == null) { + return; + } + final Cardinality actualCard; + if (val.isEmpty()) { + actualCard = Cardinality.EMPTY_SEQUENCE; + } else if (val.hasMany()) { + actualCard = Cardinality._MANY; + } else { + actualCard = Cardinality.EXACTLY_ONE; + } + if (!type.getCardinality().isSuperCardinalityOrEqualOf(actualCard)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Invalid cardinality for variable $" + var.getQName() + + ". Expected " + type.getCardinality().getHumanDescription() + + ", got " + actualCard.getHumanDescription(), val); + } + if (!Type.subTypeOf(type.getPrimaryType(), Type.NODE) && + !val.isEmpty() && + !Type.subTypeOf(val.getItemType(), type.getPrimaryType())) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Invalid type for variable $" + var.getQName() + + ". Expected " + Type.getTypeName(type.getPrimaryType()) + + ", got " + Type.getTypeName(val.getItemType()), val); + } + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("let "); + switch (mode) { + case SEQUENCE: dumper.display("$("); break; + case ARRAY: dumper.display("$["); break; + case MAP: dumper.display("${"); break; + } + for (int i = 0; i < varNames.size(); i++) { + if (i > 0) dumper.display(", "); + dumper.display("$").display(varNames.get(i).getLocalPart()); + } + switch (mode) { + case SEQUENCE: dumper.display(")"); break; + case ARRAY: dumper.display("]"); break; + case MAP: dumper.display("}"); break; + } + dumper.display(" := "); + inputSequence.dump(dumper); + dumper.nl().display("return "); + returnExpr.dump(dumper); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("let "); + switch (mode) { + case SEQUENCE: sb.append("$("); break; + case ARRAY: sb.append("$["); break; + case MAP: sb.append("${"); break; + } + for (int i = 0; i < varNames.size(); i++) { + if (i > 0) sb.append(", "); + sb.append("$").append(varNames.get(i).getLocalPart()); + } + switch (mode) { + case SEQUENCE: sb.append(")"); break; + case ARRAY: sb.append("]"); break; + case MAP: sb.append("}"); break; + } + sb.append(" := ").append(inputSequence.toString()); + sb.append(" return ").append(returnExpr.toString()); + return sb.toString(); + } + + @Override + public void accept(final ExpressionVisitor visitor) { + // No specific visitor method for destructure - use default + } + + @Override + public boolean allowMixedNodesInReturn() { + return true; + } + + @Override + public Set getTupleStreamVariables() { + return new HashSet<>(varNames); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + inputSequence.resetState(postOptimization); + } + + @Override + public int getDependencies() { + return Dependency.CONTEXT_SET; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/LetExpr.java b/exist-core/src/main/java/org/exist/xquery/LetExpr.java index 278e7d18295..b18f6d5f257 100644 --- a/exist-core/src/main/java/org/exist/xquery/LetExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/LetExpr.java @@ -108,7 +108,14 @@ public Sequence eval(Sequence contextSequence, Item contextItem) var.setContextDocs(inputSequence.getContextDocSet()); registerUpdateListener(in); - resultSequence = returnExpr.eval(contextSequence, null); + try { + resultSequence = returnExpr.eval(contextSequence, null); + } catch (final WhileClause.WhileTerminationException e) { + resultSequence = Sequence.EMPTY_SEQUENCE; + } + if (getPreviousClause() == null && WhileClause.isTerminated()) { + WhileClause.clearTerminated(); + } if (sequenceType != null) { Cardinality actualCardinality; diff --git a/exist-core/src/main/java/org/exist/xquery/LocationStep.java b/exist-core/src/main/java/org/exist/xquery/LocationStep.java index 624795add20..db87581b741 100644 --- a/exist-core/src/main/java/org/exist/xquery/LocationStep.java +++ b/exist-core/src/main/java/org/exist/xquery/LocationStep.java @@ -443,6 +443,16 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) result = getSiblings(context, contextSequence); break; + case Constants.FOLLOWING_OR_SELF_AXIS: + case Constants.PRECEDING_OR_SELF_AXIS: + result = getOrSelfAxis(context, contextSequence); + break; + + case Constants.FOLLOWING_SIBLING_OR_SELF_AXIS: + case Constants.PRECEDING_SIBLING_OR_SELF_AXIS: + result = getSiblingOrSelfAxis(context, contextSequence); + break; + default: throw new IllegalArgumentException("Unsupported axis specified"); } @@ -1003,6 +1013,93 @@ private Sequence getPrecedingOrFollowing(final XQueryContext context, final Sequ } } + /** + * XQ4: Evaluate following-or-self or preceding-or-self axis. + * Combines self:: with following:: or preceding:: and returns + * results in document order. + */ + private Sequence getOrSelfAxis(final XQueryContext context, final Sequence contextSequence) + throws XPathException { + // Evaluate self:: axis + final int savedAxis = axis; + axis = Constants.SELF_AXIS; + final Sequence selfResult = getSelf(context, contextSequence); + + // Evaluate the base axis (following or preceding) + axis = (savedAxis == Constants.FOLLOWING_OR_SELF_AXIS) + ? Constants.FOLLOWING_AXIS : Constants.PRECEDING_AXIS; + final Sequence baseResult = getPrecedingOrFollowing(context, contextSequence); + + axis = savedAxis; + + // Merge results + if (selfResult.isEmpty()) { + return baseResult; + } + if (baseResult.isEmpty()) { + return selfResult; + } + final ValueSequence combined = new ValueSequence(); + if (savedAxis == Constants.PRECEDING_OR_SELF_AXIS) { + // preceding comes first in document order, then self + combined.addAll(baseResult); + combined.addAll(selfResult); + } else { + // self comes first, then following + combined.addAll(selfResult); + combined.addAll(baseResult); + } + combined.sortInDocumentOrder(); + combined.removeDuplicates(); + return combined; + } + + /** + * XQ4: Evaluate following-sibling-or-self or preceding-sibling-or-self axis. + * Combines self:: with following-sibling:: or preceding-sibling:: and returns + * results in document order. + */ + private Sequence getSiblingOrSelfAxis(final XQueryContext context, final Sequence contextSequence) + throws XPathException { + // Evaluate self:: axis + final int savedAxis = axis; + axis = Constants.SELF_AXIS; + final Sequence selfResult = getSelf(context, contextSequence); + + // Evaluate the base sibling axis — guard against document nodes + // which don't have siblings and cause ArrayIndexOutOfBounds + axis = (savedAxis == Constants.FOLLOWING_SIBLING_OR_SELF_AXIS) + ? Constants.FOLLOWING_SIBLING_AXIS : Constants.PRECEDING_SIBLING_AXIS; + Sequence baseResult; + try { + baseResult = getSiblings(context, contextSequence); + } catch (final ArrayIndexOutOfBoundsException e) { + // Document nodes don't have siblings + baseResult = Sequence.EMPTY_SEQUENCE; + } + + axis = savedAxis; + + // Merge results + if (selfResult.isEmpty()) { + return baseResult; + } + if (baseResult.isEmpty()) { + return selfResult; + } + final ValueSequence combined = new ValueSequence(); + if (savedAxis == Constants.PRECEDING_SIBLING_OR_SELF_AXIS) { + combined.addAll(baseResult); + combined.addAll(selfResult); + } else { + combined.addAll(selfResult); + combined.addAll(baseResult); + } + combined.sortInDocumentOrder(); + combined.removeDuplicates(); + return combined; + } + /** * If the optimizer has determined that the first filter after this step is a simple positional * predicate and can be optimized, try to precompute the position and return it to limit the diff --git a/exist-core/src/main/java/org/exist/xquery/MappingArrowOperator.java b/exist-core/src/main/java/org/exist/xquery/MappingArrowOperator.java new file mode 100644 index 00000000000..7390c425604 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/MappingArrowOperator.java @@ -0,0 +1,205 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.dom.QName.IllegalQNameException; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements the XQuery 4.0 mapping arrow operator (=!>). + * + * Unlike the fat arrow (=>), which passes the entire left-hand sequence + * as the first argument, the mapping arrow iterates over each item in + * the sequence and passes each one individually, concatenating the results. + * + * {@code (1, 2, 3) =!> string()} is equivalent to {@code (1, 2, 3) ! string(.)}. + */ +public class MappingArrowOperator extends AbstractExpression { + + private QName qname = null; + private Expression leftExpr; + private FunctionCall fcall = null; + private Expression funcSpec = null; + private List parameters; + private AnalyzeContextInfo cachedContextInfo; + + public MappingArrowOperator(final XQueryContext context, final Expression leftExpr) throws XPathException { + super(context); + this.leftExpr = leftExpr; + } + + public void setArrowFunction(final String fname, final List params) throws XPathException { + try { + this.qname = QName.parse(context, fname, context.getDefaultFunctionNamespace()); + this.parameters = params; + } catch (final IllegalQNameException e) { + throw new XPathException(this, ErrorCodes.XPST0081, "No namespace defined for prefix " + fname); + } + } + + public void setArrowFunction(final PathExpr funcSpec, final List params) { + this.funcSpec = funcSpec.simplify(); + this.parameters = params; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + if (qname != null) { + fcall = NamedFunctionReference.lookupFunction(this, context, qname, parameters.size() + 1); + } + this.cachedContextInfo = contextInfo; + leftExpr.analyze(contextInfo); + if (fcall != null) { + fcall.analyze(contextInfo); + } + if (funcSpec != null) { + funcSpec.analyze(contextInfo); + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence inputSeq = leftExpr.eval(contextSequence, null); + + if (inputSeq.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final ValueSequence result = new ValueSequence(); + for (int i = 0; i < inputSeq.getItemCount(); i++) { + final Item item = inputSeq.itemAt(i); + final Sequence itemSeq = item.toSequence(); + + final FunctionReference fref; + if (fcall != null) { + fref = new FunctionReference(this, fcall); + } else { + final Sequence funcSeq = funcSpec.eval(itemSeq, null); + if (funcSeq.getCardinality() != Cardinality.EXACTLY_ONE) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Expected exactly one item for the function to be called, got " + funcSeq.getItemCount() + + ". Expression: " + ExpressionDumper.dump(funcSpec)); + } + final Item item0 = funcSeq.itemAt(0); + if (!Type.subTypeOf(item0.getType(), Type.FUNCTION)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Type error: expected function, got " + Type.getTypeName(item0.getType())); + } + fref = (FunctionReference) item0; + } + try { + final List fparams = new ArrayList<>(parameters.size() + 1); + fparams.add(new ContextParam(context, itemSeq)); + fparams.addAll(parameters); + + fref.setArguments(fparams); + fref.analyze(new AnalyzeContextInfo(cachedContextInfo)); + result.addAll(fref.eval(null)); + } finally { + fref.close(); + } + } + return result; + } + + @Override + public int returnsType() { + return fcall == null ? Type.ITEM : fcall.returnsType(); + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_MORE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + leftExpr.dump(dumper); + dumper.display(" =!> "); + if (fcall != null) { + dumper.display(fcall.getFunction().getName()).display('('); + } else { + funcSpec.dump(dumper); + } + for (int i = 0; i < parameters.size(); i++) { + if (i > 0) { + dumper.display(", "); + parameters.get(i).dump(dumper); + } + } + dumper.display(')'); + } + + @Override + public void resetState(boolean postOptimization) { + super.resetState(postOptimization); + leftExpr.resetState(postOptimization); + if (fcall != null) { + fcall.resetState(postOptimization); + } + if (funcSpec != null) { + funcSpec.resetState(postOptimization); + } + for (Expression param : parameters) { + param.resetState(postOptimization); + } + } + + private class ContextParam extends Function.Placeholder { + private final Sequence sequence; + + ContextParam(XQueryContext context, Sequence sequence) { + super(context); + this.sequence = sequence; + } + + @Override + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { + } + + @Override + public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { + return sequence; + } + + @Override + public int returnsType() { + return sequence.getItemType(); + } + + @Override + public void dump(ExpressionDumper dumper) { + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/MethodCallOperator.java b/exist-core/src/main/java/org/exist/xquery/MethodCallOperator.java new file mode 100644 index 00000000000..0cde3871151 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/MethodCallOperator.java @@ -0,0 +1,209 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements the XQuery 4.0 method call operator (=?>). + * + * {@code $map =?> method(args)} looks up the key "method" in the map, + * retrieves the function stored there, and calls it with the map as + * the first argument followed by any additional arguments. + * + * For each item in the left-hand sequence: + *
    + *
  1. The item must be a map (XPTY0004 otherwise)
  2. + *
  3. The method name is looked up as a key in the map
  4. + *
  5. The value must be exactly one function (XPTY0004 otherwise)
  6. + *
  7. The function is called with the map as first argument + additional args
  8. + *
+ * + * Like the mapping arrow (=!>), it processes each item individually + * and concatenates results. + */ +public class MethodCallOperator extends AbstractExpression { + + private Expression leftExpr; + private String methodName; + private List parameters; + private AnalyzeContextInfo cachedContextInfo; + + public MethodCallOperator(final XQueryContext context, final Expression leftExpr) throws XPathException { + super(context); + this.leftExpr = leftExpr; + } + + public void setMethod(final String methodName, final List params) { + this.methodName = methodName; + this.parameters = params; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + this.cachedContextInfo = contextInfo; + leftExpr.analyze(contextInfo); + if (parameters != null) { + for (final Expression param : parameters) { + param.analyze(contextInfo); + } + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence inputSeq = leftExpr.eval(contextSequence, null); + + if (inputSeq.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final ValueSequence result = new ValueSequence(); + for (int i = 0; i < inputSeq.getItemCount(); i++) { + final Item item = inputSeq.itemAt(i); + + // The item must be a map + if (!Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method call operator (=?>) requires a map, got " + + Type.getTypeName(item.getType())); + } + + final AbstractMapType map = (AbstractMapType) item; + + // Look up the method name as a key in the map + final Sequence methodValue = map.get(new StringValue(this, methodName)); + if (methodValue == null || methodValue.isEmpty()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method '" + methodName + "' not found in map"); + } + + if (methodValue.getItemCount() != 1) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method '" + methodName + "' must be a single function, got " + + methodValue.getItemCount() + " items"); + } + + final Item methodItem = methodValue.itemAt(0); + if (!Type.subTypeOf(methodItem.getType(), Type.FUNCTION)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method '" + methodName + "' is not a function, got " + + Type.getTypeName(methodItem.getType())); + } + + final FunctionReference fref = (FunctionReference) methodItem; + + // Check arity: function must accept at least 1 argument (the map itself) + final int expectedArity = (parameters != null ? parameters.size() : 0) + 1; + if (fref.getSignature().getArgumentCount() == 0) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method '" + methodName + "' has arity 0 and cannot accept the map as first argument"); + } + + try { + final List fparams = new ArrayList<>(expectedArity); + fparams.add(new ContextParam(context, item.toSequence())); + if (parameters != null) { + fparams.addAll(parameters); + } + + fref.setArguments(fparams); + fref.analyze(new AnalyzeContextInfo(cachedContextInfo)); + result.addAll(fref.eval(null)); + } finally { + fref.close(); + } + } + return result; + } + + @Override + public int returnsType() { + return Type.ITEM; + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_MORE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + leftExpr.dump(dumper); + dumper.display(" =?> ").display(methodName).display('('); + if (parameters != null) { + for (int i = 0; i < parameters.size(); i++) { + if (i > 0) { + dumper.display(", "); + } + parameters.get(i).dump(dumper); + } + } + dumper.display(')'); + } + + @Override + public void resetState(boolean postOptimization) { + super.resetState(postOptimization); + leftExpr.resetState(postOptimization); + if (parameters != null) { + for (Expression param : parameters) { + param.resetState(postOptimization); + } + } + } + + private class ContextParam extends Function.Placeholder { + private final Sequence sequence; + + ContextParam(XQueryContext context, Sequence sequence) { + super(context); + this.sequence = sequence; + } + + @Override + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { + } + + @Override + public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { + return sequence; + } + + @Override + public int returnsType() { + return sequence.getItemType(); + } + + @Override + public void dump(ExpressionDumper dumper) { + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/Option.java b/exist-core/src/main/java/org/exist/xquery/Option.java index 27f8615dfdb..32c38e67dd7 100644 --- a/exist-core/src/main/java/org/exist/xquery/Option.java +++ b/exist-core/src/main/java/org/exist/xquery/Option.java @@ -60,7 +60,9 @@ public Option(QName qname, String contents) throws XPathException { } public Option(final Expression expression, QName qname, String contents) throws XPathException { - if (qname.getPrefix() == null || qname.getPrefix().isEmpty()) + // Options must be in a namespace: either via prefix or via URIQualifiedName Q{uri}local + if ((qname.getPrefix() == null || qname.getPrefix().isEmpty()) + && (qname.getNamespaceURI() == null || qname.getNamespaceURI().isEmpty())) {throw new XPathException(expression, "XPST0081: options must have a prefix");} this.qname = qname; this.contents = contents; diff --git a/exist-core/src/main/java/org/exist/xquery/OtherwiseExpression.java b/exist-core/src/main/java/org/exist/xquery/OtherwiseExpression.java new file mode 100644 index 00000000000..760ab147c54 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/OtherwiseExpression.java @@ -0,0 +1,90 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Item; + +/** + * Implements the XQuery 4.0 "otherwise" operator. + * + * {@code E1 otherwise E2} returns E1 if it is non-empty, otherwise E2. + */ +public class OtherwiseExpression extends AbstractExpression { + + private Expression left; + private Expression right; + + public OtherwiseExpression(final XQueryContext context, final Expression left, final Expression right) { + super(context); + this.left = left; + this.right = right; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + left.analyze(new AnalyzeContextInfo(contextInfo)); + right.analyze(new AnalyzeContextInfo(contextInfo)); + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence leftResult = left.eval(contextSequence, null); + if (leftResult != null && !leftResult.isEmpty()) { + return leftResult; + } + return right.eval(contextSequence, null); + } + + @Override + public int returnsType() { + return left.returnsType(); + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_MORE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + left.dump(dumper); + dumper.display(" otherwise "); + right.dump(dumper); + } + + @Override + public String toString() { + return left.toString() + " otherwise " + right.toString(); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + left.resetState(postOptimization); + right.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/PipelineExpression.java b/exist-core/src/main/java/org/exist/xquery/PipelineExpression.java new file mode 100644 index 00000000000..5c746c1127f --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/PipelineExpression.java @@ -0,0 +1,106 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements the XQuery 4.0 pipeline operator "->". + * + * The expression {@code E1 -> E2} evaluates E1, then evaluates E2 with the + * result of E1 as the context value, position 1, and last 1. + */ +public class PipelineExpression extends AbstractExpression { + + private Expression left; + private Expression right; + + public PipelineExpression(final XQueryContext context, final Expression left, final Expression right) { + super(context); + this.left = left; + this.right = right; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + left.analyze(new AnalyzeContextInfo(contextInfo)); + right.analyze(new AnalyzeContextInfo(contextInfo)); + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence leftResult = left.eval(contextSequence, null); + + // Pipeline: set context position=0 (position()=1) and a single-item + // context sequence so last()=1, per XQ4 spec. + final Sequence singletonContext; + if (leftResult.isEmpty()) { + singletonContext = Sequence.EMPTY_SEQUENCE; + } else { + singletonContext = new ValueSequence(1); + singletonContext.add(leftResult.itemAt(0)); + } + final int savedPos = context.getContextPosition(); + final Sequence savedSeq = context.getContextSequence(); + context.setContextSequencePosition(0, singletonContext); + try { + return right.eval(leftResult, null); + } finally { + context.setContextSequencePosition(savedPos, savedSeq); + } + } + + @Override + public int returnsType() { + return right.returnsType(); + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_MORE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + left.dump(dumper); + dumper.display(" -> "); + right.dump(dumper); + } + + @Override + public String toString() { + return left.toString() + " -> " + right.toString(); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + left.resetState(postOptimization); + right.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/RangeSequence.java b/exist-core/src/main/java/org/exist/xquery/RangeSequence.java index c23c663067e..eb3ecfa6507 100644 --- a/exist-core/src/main/java/org/exist/xquery/RangeSequence.java +++ b/exist-core/src/main/java/org/exist/xquery/RangeSequence.java @@ -21,8 +21,6 @@ */ package org.exist.xquery; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.exist.dom.persistent.NodeSet; import org.exist.xquery.value.AbstractSequence; import org.exist.xquery.value.IntegerValue; @@ -32,18 +30,40 @@ import org.exist.xquery.value.SequenceIterator; import org.exist.xquery.value.Type; -import java.math.BigInteger; - +/** + * An immutable, lazy sequence representing an integer range (start to end). + * Stores only the start and end values as primitive longs — no intermediate + * IntegerValue objects are created until accessed. Operations like count(), + * isEmpty(), itemAt(), and subsequence() are O(1). + */ public class RangeSequence extends AbstractSequence { - private final static Logger LOG = LogManager.getLogger(AbstractSequence.class); - - private final IntegerValue start; - private final IntegerValue end; + private final long start; + private final long end; + private final long size; public RangeSequence(final IntegerValue start, final IntegerValue end) { + this(start.getLong(), end.getLong()); + } + + public RangeSequence(final long start, final long end) { this.start = start; this.end = end; + if (start <= end) { + final long diff = end - start; + // Overflow protection: if diff < 0, the range is too large + this.size = (diff >= 0) ? diff + 1 : Long.MAX_VALUE; + } else { + this.size = 0; + } + } + + public long getStart() { + return start; + } + + public long getEnd() { + return end; } @Override @@ -62,16 +82,16 @@ public int getItemType() { @Override public SequenceIterator iterate() { - return new RangeSequenceIterator(start.getLong(), end.getLong()); + return new RangeSequenceIterator(start, end); } @Override public SequenceIterator unorderedIterator() { - return new RangeSequenceIterator(start.getLong(), end.getLong()); + return new RangeSequenceIterator(start, end); } public SequenceIterator iterateInReverse() { - return new ReverseRangeSequenceIterator(start.getLong(), end.getLong()); + return new ReverseRangeSequenceIterator(start, end); } private static class RangeSequenceIterator implements SequenceIterator { @@ -148,39 +168,30 @@ public long skip(final long n) { @Override public long getItemCountLong() { - if (start.compareTo(end) > 0) { - return 0; - } - try { - return ((IntegerValue) end.minus(start)).getLong() + 1; - } catch (final XPathException e) { - LOG.warn("Unexpected exception when processing result of range expression: {}", e.getMessage(), e); - return 0; - } + return size; } @Override public boolean isEmpty() { - return getItemCountLong() == 0; + return size == 0; } @Override public boolean hasOne() { - return getItemCountLong() == 1; + return size == 1; } @Override public boolean hasMany() { - return getItemCountLong() > 1; + return size > 1; } @Override public Cardinality getCardinality() { - final long itemCount = getItemCountLong(); - if (itemCount <= 0) { + if (size == 0) { return Cardinality.EMPTY_SEQUENCE; } - if (itemCount == 1) { + if (size == 1) { return Cardinality.EXACTLY_ONE; } return Cardinality._MANY; @@ -188,12 +199,26 @@ public Cardinality getCardinality() { @Override public Item itemAt(final int pos) { - if (pos < getItemCountLong()) { - return new IntegerValue(start.getLong() + pos); + if (pos >= 0 && pos < size) { + return new IntegerValue(start + pos); } return null; } + @Override + public boolean contains(final Item item) { + if (item instanceof IntegerValue) { + final long val = ((IntegerValue) item).getLong(); + return val >= start && val <= end; + } + return false; + } + + @Override + public boolean containsReference(final Item item) { + return false; // primitives don't have reference identity + } + @Override public NodeSet toNodeSet() throws XPathException { throw new XPathException(this, "Type error: the sequence cannot be converted into" + @@ -211,37 +236,7 @@ public void removeDuplicates() { } @Override - public boolean containsReference(final Item item) { - return start == item || end == item; - } - - @Override - public boolean contains(final Item item) { - if (item instanceof IntegerValue) { - try { - final BigInteger other = item.toJavaObject(BigInteger.class); - return other.compareTo(start.toJavaObject(BigInteger.class)) >= 0 - && other.compareTo(end.toJavaObject(BigInteger.class)) <= 0; - } catch (final XPathException e) { - LOG.warn(e.getMessage(), e); - return false; - } - } - return false; + public String toString() { + return "Range(" + start + " to " + end + ")"; } - - /** - * Generates a string representation of the Range Sequence. - * - * Range sequences can potentially be - * very large, so we generate a summary here - * rather than evaluating to generate a (possibly) - * huge sequence of objects. - * - * @return a string representation of the range sequence. - */ - @Override - public String toString() { - return "Range(" + start + " to " + end + ")"; - } } diff --git a/exist-core/src/main/java/org/exist/xquery/StaticXQueryException.java b/exist-core/src/main/java/org/exist/xquery/StaticXQueryException.java index 682be4dfff1..36494f688cc 100644 --- a/exist-core/src/main/java/org/exist/xquery/StaticXQueryException.java +++ b/exist-core/src/main/java/org/exist/xquery/StaticXQueryException.java @@ -30,19 +30,19 @@ public StaticXQueryException(String message) { } public StaticXQueryException(final Expression expression, String message) { - super(expression, message); + super(expression, ErrorCodes.XPST0003, message); } public StaticXQueryException(int line, int column, String message) { - super(line, column, message); + super(line, column, ErrorCodes.XPST0003, message); } - + public StaticXQueryException(Throwable cause) { this((Expression) null, cause); } - + public StaticXQueryException(final Expression expression, Throwable cause) { - super(expression, cause); + super(expression, ErrorCodes.XPST0003, cause.getMessage(), cause); } public StaticXQueryException(String message, Throwable cause) { @@ -50,11 +50,20 @@ public StaticXQueryException(String message, Throwable cause) { } public StaticXQueryException(final Expression expression, String message, Throwable cause) { - super(expression, message, cause); + super(expression, ErrorCodes.XPST0003, message, cause); } - //TODO add in ErrorCode and ErrorVal public StaticXQueryException(int line, int column, String message, Throwable cause) { - super(line, column, message, cause); + super(line, column, ErrorCodes.XPST0003, message); + initCause(cause); + } + + public StaticXQueryException(int line, int column, ErrorCodes.ErrorCode errorCode, String message) { + super(line, column, errorCode, message); + } + + public StaticXQueryException(int line, int column, ErrorCodes.ErrorCode errorCode, String message, Throwable cause) { + super(line, column, errorCode, message); + initCause(cause); } } \ No newline at end of file diff --git a/exist-core/src/main/java/org/exist/xquery/StringConstructor.java b/exist-core/src/main/java/org/exist/xquery/StringConstructor.java index 3d725e63c66..ba3b0fce492 100644 --- a/exist-core/src/main/java/org/exist/xquery/StringConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/StringConstructor.java @@ -159,9 +159,13 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException public String eval(final Sequence contextSequence) throws XPathException { final Sequence result = expression.eval(contextSequence, null); + // Atomize the result per spec: string constructor interpolation + // atomizes its content, joining with spaces + final Sequence atomized = Atomize.atomize(result); + final StringBuilder out = new StringBuilder(); boolean gotOne = false; - for(final SequenceIterator i = result.iterate(); i.hasNext(); ) { + for(final SequenceIterator i = atomized.iterate(); i.hasNext(); ) { final Item next = i.nextItem(); if (gotOne) { out.append(' '); diff --git a/exist-core/src/main/java/org/exist/xquery/SwitchExpression.java b/exist-core/src/main/java/org/exist/xquery/SwitchExpression.java index d75361bf784..70e263539cf 100644 --- a/exist-core/src/main/java/org/exist/xquery/SwitchExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/SwitchExpression.java @@ -56,11 +56,20 @@ public Case(List caseOperands, Expression caseClause) { private Expression operand; private Case defaultClause = null; private List cases = new ArrayList<>(5); - + private boolean booleanMode = false; + public SwitchExpression(XQueryContext context, Expression operand) { super(context); this.operand = operand; } + + /** + * Set boolean mode for XQ4 omitted comparand: switch () { case boolExpr return ... } + * In boolean mode, each case operand is evaluated and its effective boolean value determines the match. + */ + public void setBooleanMode(boolean booleanMode) { + this.booleanMode = booleanMode; + } /** * Add case clause(s) with a return. @@ -88,34 +97,58 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc if (contextItem != null) {contextSequence = contextItem.toSequence();} + + if (booleanMode) { + // XQ4 omitted comparand: evaluate each case operand as boolean + return evalBooleanMode(contextSequence, contextItem); + } + final Sequence opSeq = operand.eval(contextSequence, null); - Sequence result = null; + if (opSeq.hasMany()) { + throw new XPathException(this, ErrorCodes.XPTY0004, "Cardinality error in switch operand ", opSeq); + } + final Collator defaultCollator = context.getDefaultCollator(); if (opSeq.isEmpty()) { - result = defaultClause.returnClause.eval(contextSequence, null); + // XQ4: empty comparand can match case () (empty case operand) + for (final Case next : cases) { + for (final Expression caseOperand : next.operands) { + final Sequence caseSeq = caseOperand.eval(contextSequence, contextItem); + if (caseSeq.isEmpty()) { + return next.returnClause.eval(contextSequence, null); + } + } + } } else { - if (opSeq.hasMany()) { - throw new XPathException(this, ErrorCodes.XPTY0004, "Cardinality error in switch operand ", opSeq); + final AtomicValue opVal = opSeq.itemAt(0).atomize(); + for (final Case next : cases) { + for (final Expression caseOperand : next.operands) { + final Sequence caseSeq = caseOperand.eval(contextSequence, contextItem); + if (context.getXQueryVersion() <= 30 && caseSeq.hasMany()) { + throw new XPathException(this, ErrorCodes.XPTY0004, "Cardinality error in switch case operand ", caseSeq); + } + // XQ4: case operand may be a sequence; match if any item equals the comparand + for (int i = 0; i < caseSeq.getItemCount(); i++) { + final AtomicValue caseVal = caseSeq.itemAt(i).atomize(); + if (FunDeepEqual.deepEquals(caseVal, opVal, defaultCollator)) { + return next.returnClause.eval(contextSequence, null); + } + } + } } - final AtomicValue opVal = opSeq.itemAt(0).atomize(); - final Collator defaultCollator = context.getDefaultCollator(); - for (final Case next : cases) { - for (final Expression caseOperand : next.operands) { - final Sequence caseSeq = caseOperand.eval(contextSequence, contextItem); - if (caseSeq.hasMany()) { - throw new XPathException(this, ErrorCodes.XPTY0004, "Cardinality error in switch case operand ", caseSeq); - } - final AtomicValue caseVal = caseSeq.isEmpty() ? AtomicValue.EMPTY_VALUE : caseSeq.itemAt(0).atomize(); - if (FunDeepEqual.deepEquals(caseVal, opVal, defaultCollator)) { - return next.returnClause.eval(contextSequence, null); - } - } - } } - if (result == null) { - result = defaultClause.returnClause.eval(contextSequence, null); + return defaultClause.returnClause.eval(contextSequence, null); + } + + private Sequence evalBooleanMode(Sequence contextSequence, Item contextItem) throws XPathException { + for (final Case next : cases) { + for (final Expression caseOperand : next.operands) { + final Sequence caseSeq = caseOperand.eval(contextSequence, contextItem); + if (caseSeq.effectiveBooleanValue()) { + return next.returnClause.eval(contextSequence, null); + } + } } - - return result; + return defaultClause.returnClause.eval(contextSequence, null); } public int returnsType() { diff --git a/exist-core/src/main/java/org/exist/xquery/TreatAsExpression.java b/exist-core/src/main/java/org/exist/xquery/TreatAsExpression.java index ab90c1245a4..3cf503b72e1 100644 --- a/exist-core/src/main/java/org/exist/xquery/TreatAsExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/TreatAsExpression.java @@ -63,7 +63,7 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { expression = new DynamicCardinalityCheck(context, type.getCardinality(), expression, new Error("XPDY0050", type.toString())); - expression = new DynamicTypeCheck(context, type.getPrimaryType(), expression); + expression = new DynamicTypeCheck(context, type.getPrimaryType(), expression, ErrorCodes.XPDY0050); } public void dump(ExpressionDumper dumper) { diff --git a/exist-core/src/main/java/org/exist/xquery/TryCatchExpression.java b/exist-core/src/main/java/org/exist/xquery/TryCatchExpression.java index c11a2acf065..0712770b636 100644 --- a/exist-core/src/main/java/org/exist/xquery/TryCatchExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/TryCatchExpression.java @@ -63,6 +63,7 @@ public class TryCatchExpression extends AbstractExpression { private final Expression tryTargetExpr; private final List catchClauses = new ArrayList<>(); + private Expression finallyExpr; /** * Constructor. @@ -88,6 +89,10 @@ public void addCatchClause(final List catchErrorList, final List c catchClauses.add( new CatchClause(catchErrorList, catchVars, catchExpr) ); } + public void setFinallyExpr(final Expression finallyExpr) { + this.finallyExpr = finallyExpr; + } + @Override public int getDependencies() { return Dependency.CONTEXT_SET | Dependency.CONTEXT_ITEM; @@ -126,6 +131,9 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException for (final CatchClause catchClause : catchClauses) { catchClause.getCatchExpr().analyze(contextInfo); } + if (finallyExpr != null) { + finallyExpr.analyze(contextInfo); + } } finally { // restore the local variable stack context.popLocalVariables(mark); @@ -141,107 +149,136 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr throw new XPathException(this, ErrorCodes.EXXQDY0003, "The try-catch expression is only available in xquery version \"3.0\" and later."); } + Sequence result = null; + Throwable pendingError = null; + try { // Evaluate 'try' expression - final Sequence tryTargetSeq = tryTargetExpr.eval(contextSequence, contextItem); - return tryTargetSeq; + result = tryTargetExpr.eval(contextSequence, contextItem); - } catch (final Throwable throwable) { + } catch (final Throwable throwable) { - final ErrorCode errorCode; + // If no catch clauses (try/finally only), re-throw after finally + if (catchClauses.isEmpty()) { + pendingError = throwable; + } else { - // fn:error throws an XPathException - if(throwable instanceof XPathException xpe){ - // Get errorcode from nicely thrown xpathexception + final ErrorCode errorCode; - if(xpe.getErrorCode() != null) { - if(xpe.getErrorCode() == ErrorCodes.ERROR) { - errorCode = extractErrorCode(xpe); + // fn:error throws an XPathException + if (throwable instanceof XPathException xpe) { + // Get errorcode from nicely thrown xpathexception + + if (xpe.getErrorCode() != null) { + if (xpe.getErrorCode() == ErrorCodes.ERROR) { + errorCode = extractErrorCode(xpe); + } else { + errorCode = xpe.getErrorCode(); + } } else { - errorCode = xpe.getErrorCode(); + // if no errorcode is found, reconstruct by parsing the error text. + errorCode = extractErrorCode(xpe); } } else { - // if no errorcode is found, reconstruct by parsing the error text. - errorCode = extractErrorCode(xpe); + // Get errorcode from all other errors and exceptions + errorCode = new JavaErrorCode(throwable); } - } else { - // Get errorcode from all other errors and exceptions - errorCode = new JavaErrorCode(throwable); - } - // We need the qname in the end - final QName errorCodeQname = errorCode.getErrorQName(); - - // Exception in thrown, catch expression will be evaluated. - // catchvars (CatchErrorCode (, CatchErrorDesc (, CatchErrorVal)?)? ) - // need to be retrieved as variables - Sequence catchResultSeq = null; - final LocalVariable mark0 = context.markLocalVariables(false); // DWES: what does this do? - - // DWES: should I use popLocalVariables - context.declareInScopeNamespace(Namespaces.W3C_XQUERY_XPATH_ERROR_PREFIX, Namespaces.W3C_XQUERY_XPATH_ERROR_NS); - context.declareInScopeNamespace(Namespaces.EXIST_XQUERY_XPATH_ERROR_PREFIX, Namespaces.EXIST_XQUERY_XPATH_ERROR_NS); - - //context.declareInScopeNamespace(null, null); - - try { - // flag used to escape loop when errorcode has matched - boolean errorMatched = false; - - // Iterate on all catch clauses - for (final CatchClause catchClause : catchClauses) { - - if (isErrorInList(errorCodeQname, catchClause.getCatchErrorList()) && !errorMatched) { - - errorMatched = true; - - // Get catch variables - final LocalVariable mark1 = context.markLocalVariables(false); // DWES: what does this do? - - try { - // Add std errors - addErrCode(errorCodeQname); - addErrDescription(throwable, errorCode); - addErrValue(throwable); - addErrModule(throwable); - addErrLineNumber(throwable); - addErrColumnNumber(throwable); - addErrAdditional(throwable); - addFunctionTrace(throwable); - addJavaTrace(throwable); - - // Evaluate catch expression - catchResultSeq = ((Expression) catchClause.getCatchExpr()).eval(contextSequence, contextItem); - - - } finally { - context.popLocalVariables(mark1, catchResultSeq); + // We need the qname in the end + final QName errorCodeQname = errorCode.getErrorQName(); + + // Exception in thrown, catch expression will be evaluated. + // catchvars (CatchErrorCode (, CatchErrorDesc (, CatchErrorVal)?)? ) + // need to be retrieved as variables + Sequence catchResultSeq = null; + final LocalVariable mark0 = context.markLocalVariables(false); + + context.declareInScopeNamespace(Namespaces.W3C_XQUERY_XPATH_ERROR_PREFIX, Namespaces.W3C_XQUERY_XPATH_ERROR_NS); + context.declareInScopeNamespace(Namespaces.EXIST_XQUERY_XPATH_ERROR_PREFIX, Namespaces.EXIST_XQUERY_XPATH_ERROR_NS); + + try { + // flag used to escape loop when errorcode has matched + boolean errorMatched = false; + + // Iterate on all catch clauses + for (final CatchClause catchClause : catchClauses) { + + if (isErrorInList(errorCodeQname, catchClause.getCatchErrorList()) && !errorMatched) { + + errorMatched = true; + + // Get catch variables + final LocalVariable mark1 = context.markLocalVariables(false); + + try { + // Add std errors + addErrCode(errorCodeQname); + addErrDescription(throwable, errorCode); + addErrValue(throwable); + addErrModule(throwable); + addErrLineNumber(throwable); + addErrColumnNumber(throwable); + addErrAdditional(throwable); + addFunctionTrace(throwable); + addJavaTrace(throwable); + + // Evaluate catch expression + catchResultSeq = ((Expression) catchClause.getCatchExpr()).eval(contextSequence, contextItem); + + + } finally { + context.popLocalVariables(mark1, catchResultSeq); + } + + } else { + // if in the end nothing is set, rethrow after loop } + } // for catch clauses + // If an error hasn't been caught, store for re-throw after finally + if (!errorMatched) { + pendingError = throwable; } else { - // if in the end nothing is set, rethrow after loop + result = catchResultSeq; } - } // for catch clauses - // If an error hasn't been caught, throw new one - if (!errorMatched) { - if (throwable instanceof XPathException) { - throw throwable; - } else { - LOG.error(throwable); - throw new XPathException(this, throwable); + } finally { + context.popLocalVariables(mark0, catchResultSeq); + } + } + } finally { + // XQ4: Evaluate finally clause (always, even if try/catch succeeded or failed) + if (finallyExpr != null) { + try { + final Sequence finallyResult = finallyExpr.eval(contextSequence, contextItem); + // If finally produces a non-empty sequence, raise XQTY0153 + if (finallyResult != null && !finallyResult.isEmpty()) { + throw new XPathException(this, ErrorCodes.XQTY0153, + "The finally clause must evaluate to an empty sequence, got " + + finallyResult.getItemCount() + " item(s)"); } + } catch (final XPathException finallyError) { + // Finally error replaces any pending error or result + context.expressionEnd(this); + throw finallyError; } - - } finally { - context.popLocalVariables(mark0, catchResultSeq); } - return catchResultSeq; + // Re-throw pending error from try body (if not caught) + if (pendingError != null) { + context.expressionEnd(this); + if (pendingError instanceof XPathException) { + throw (XPathException) pendingError; + } else { + LOG.error(pendingError); + throw new XPathException(this, pendingError); + } + } - } finally { context.expressionEnd(this); } + + return result; } @@ -384,6 +421,13 @@ public void dump(final ExpressionDumper dumper) { dumper.nl().display("}"); dumper.endIndent(); } + if (finallyExpr != null) { + dumper.nl().display("} finally {"); + dumper.startIndent(); + finallyExpr.dump(dumper); + dumper.nl().display("}"); + dumper.endIndent(); + } } /** @@ -428,6 +472,11 @@ public String toString() { result.append(catchExpr.toString()); result.append("}"); } + if (finallyExpr != null) { + result.append(" finally { "); + result.append(finallyExpr.toString()); + result.append("}"); + } return result.toString(); } @@ -436,8 +485,10 @@ public String toString() { */ @Override public int returnsType() { - // fixme! /ljo - return ((Expression) catchClauses.getFirst().getCatchExpr()).returnsType(); + if (!catchClauses.isEmpty()) { + return ((Expression) catchClauses.getFirst().getCatchExpr()).returnsType(); + } + return tryTargetExpr.returnsType(); } /* (non-Javadoc) @@ -451,6 +502,9 @@ public void resetState(final boolean postOptimization) { final Expression catchExpr = (Expression) catchClause.getCatchExpr(); catchExpr.resetState(postOptimization); } + if (finallyExpr != null) { + finallyExpr.resetState(postOptimization); + } } @Override diff --git a/exist-core/src/main/java/org/exist/xquery/UserDefinedFunction.java b/exist-core/src/main/java/org/exist/xquery/UserDefinedFunction.java index a56db1a200b..33b781868ed 100644 --- a/exist-core/src/main/java/org/exist/xquery/UserDefinedFunction.java +++ b/exist-core/src/main/java/org/exist/xquery/UserDefinedFunction.java @@ -24,8 +24,10 @@ import org.exist.dom.persistent.DocumentSet; import org.exist.dom.QName; import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.Item; import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; import java.util.ArrayList; import java.util.List; @@ -125,31 +127,51 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc } Sequence result = null; try { - QName varName; - LocalVariable var; - int j = 0; - for (int i = 0; i < parameters.size(); i++, j++) { - varName = parameters.get(i); - var = new LocalVariable(varName); - var.setValue(currentArguments[j]); - if (contextDocs != null) { + final SequenceType[] argTypes = getSignature().getArgumentTypes(); + + // Evaluate all argument values first, BEFORE declaring any parameters. + // Default value expressions must be evaluated in the prolog's variable scope, + // not the function body scope (XQ4 spec: default sees variables in scope at + // the function declaration, not other parameters). Context is passed so that + // default values like "." can access the context item at the call site. + final Sequence[] argValues = new Sequence[parameters.size()]; + for (int i = 0; i < parameters.size(); i++) { + if (i < currentArguments.length) { + argValues[i] = currentArguments[i]; + } else if (argTypes[i] instanceof FunctionParameterSequenceType && + ((FunctionParameterSequenceType) argTypes[i]).hasDefaultValue()) { + argValues[i] = ((FunctionParameterSequenceType) argTypes[i]) + .getDefaultValue().eval(contextSequence, contextItem); + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Missing required argument $" + parameters.get(i)); + } + } + + // Now declare all parameters with their resolved values + for (int i = 0; i < parameters.size(); i++) { + final QName varName = parameters.get(i); + final LocalVariable var = new LocalVariable(varName); + + var.setValue(argValues[i]); + if (contextDocs != null && i < contextDocs.length) { var.setContextDocs(contextDocs[i]); } context.declareVariableBinding(var); Cardinality actualCardinality; - if (currentArguments[j].isEmpty()) { + if (argValues[i].isEmpty()) { actualCardinality = Cardinality.EMPTY_SEQUENCE; - } else if (currentArguments[j].hasMany()) { + } else if (argValues[i].hasMany()) { actualCardinality = Cardinality._MANY; } else { actualCardinality = Cardinality.EXACTLY_ONE; } - if (!getSignature().getArgumentTypes()[j].getCardinality().isSuperCardinalityOrEqualOf(actualCardinality)) { + if (!argTypes[i].getCardinality().isSuperCardinalityOrEqualOf(actualCardinality)) { throw new XPathException(this, ErrorCodes.XPTY0004, "Invalid cardinality for parameter $" + varName + - ". Expected " + getSignature().getArgumentTypes()[j].getCardinality().getHumanDescription() + - ", got " + currentArguments[j].getItemCount()); + ". Expected " + argTypes[i].getCardinality().getHumanDescription() + + ", got " + argValues[i].getItemCount()); } } result = body.eval(null, null); diff --git a/exist-core/src/main/java/org/exist/xquery/WhileClause.java b/exist-core/src/main/java/org/exist/xquery/WhileClause.java new file mode 100644 index 00000000000..654aaf67fc3 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/WhileClause.java @@ -0,0 +1,136 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; + +import java.util.HashSet; +import java.util.Set; + +/** + * Implements the XQuery 4.0 while clause in FLWOR expressions. + * + *

The while clause evaluates a condition for each tuple in the stream. + * If the condition is true, the tuple is retained; if false, the tuple + * and all subsequent tuples are discarded (iteration stops).

+ */ +public class WhileClause extends AbstractFLWORClause { + + /** + * Lightweight control-flow exception used to signal the immediately + * enclosing for/let binding expression to stop iteration. + */ + public static class WhileTerminationException extends XPathException { + public WhileTerminationException() { + super((Expression) null, "while clause terminated"); + } + } + + /** + * Thread-local flag that signals all enclosing binding expressions + * in the same FLWOR to stop iteration after the current item. + */ + private static final ThreadLocal terminated = ThreadLocal.withInitial(() -> false); + + public static boolean isTerminated() { + return terminated.get(); + } + + public static void clearTerminated() { + terminated.set(false); + } + + private final Expression whileExpr; + + public WhileClause(final XQueryContext context, final Expression whileExpr) { + super(context); + this.whileExpr = whileExpr; + } + + @Override + public ClauseType getType() { + return ClauseType.WHILE; + } + + public Expression getWhileExpr() { + return whileExpr; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); + newContextInfo.setFlags(contextInfo.getFlags() | IN_PREDICATE | IN_WHERE_CLAUSE); + newContextInfo.setContextId(getExpressionId()); + whileExpr.analyze(newContextInfo); + + final AnalyzeContextInfo returnContextInfo = new AnalyzeContextInfo(contextInfo); + returnContextInfo.addFlag(SINGLE_STEP_EXECUTION); + returnExpr.analyze(returnContextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence condResult = whileExpr.eval(null, null); + if (condResult.effectiveBooleanValue()) { + return returnExpr.eval(null, null); + } + terminated.set(true); + throw new WhileTerminationException(); + } + + @Override + public Sequence postEval(final Sequence seq) throws XPathException { + if (returnExpr instanceof FLWORClause flworClause) { + return flworClause.postEval(seq); + } + return super.postEval(seq); + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("while", whileExpr.getLine()); + dumper.startIndent(); + whileExpr.dump(dumper); + dumper.endIndent().nl(); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + whileExpr.resetState(postOptimization); + returnExpr.resetState(postOptimization); + } + + @Override + public Set getTupleStreamVariables() { + final Set vars = new HashSet<>(); + final LocalVariable startVar = getStartVariable(); + if (startVar != null) { + vars.add(startVar.getQName()); + } + return vars; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java index b3721c34179..600ef6b336b 100644 --- a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java +++ b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java @@ -1840,7 +1840,7 @@ public void declareFunction(final UserDefinedFunction function) throws XPathExce final QName name = function.getSignature().getName(); final String uri = name.getNamespaceURI(); - if (uri.isEmpty()) { + if (uri.isEmpty() && getXQueryVersion() < 40) { throw new XPathException(function, ErrorCodes.XQST0060, "Every declared function name must have a non-null namespace URI, " + "but function '" + name + "' does not meet this requirement."); @@ -1865,7 +1865,31 @@ public void declareFunction(final UserDefinedFunction function) throws XPathExce @Override public @Nullable UserDefinedFunction resolveFunction(final QName name, final int argCount) { final FunctionId id = new FunctionId(name, argCount); - return declaredFunctions.get(id); + final UserDefinedFunction exact = declaredFunctions.get(id); + if (exact != null) { + return exact; + } + // XQ4: Try to find a function with more params where trailing params have defaults + for (final UserDefinedFunction func : declaredFunctions.values()) { + if (func.getName().equals(name)) { + final SequenceType[] argTypes = func.getSignature().getArgumentTypes(); + if (argTypes.length > argCount) { + // Check that all params from argCount onwards have defaults + boolean allDefaulted = true; + for (int i = argCount; i < argTypes.length; i++) { + if (!(argTypes[i] instanceof FunctionParameterSequenceType) || + !((FunctionParameterSequenceType) argTypes[i]).hasDefaultValue()) { + allDefaulted = false; + break; + } + } + if (allDefaulted) { + return func; + } + } + } + } + return null; } @Override @@ -2730,6 +2754,13 @@ private ExternalModule compileOrBorrowModule(final String namespaceURI, final St * @return The compiled module, or null if the source is not a module * @throws XPathException if the module could not be loaded (XQST0059) or compiled (XPST0003) */ + /** + * Compile a module from a Source. Public wrapper for fn:load-xquery-module content option. + */ + public @Nullable ExternalModule compileModuleFromSource(final String namespaceURI, final Source source) throws XPathException { + return compileModule(namespaceURI, null, "content", source); + } + private @Nullable ExternalModule compileModule(String namespaceURI, final String prefix, final String location, final Source source) throws XPathException { if (LOG.isDebugEnabled()) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayBuild.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayBuild.java new file mode 100644 index 00000000000..bcf73834e61 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayBuild.java @@ -0,0 +1,87 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +import java.util.ArrayList; +import java.util.List; + +/** + * array:build($seq, $fn?) — Build array from sequence with optional mapping function. + */ +public class ArrayBuild extends BasicFunction { + + private AnalyzeContextInfo cachedContextInfo; + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("build", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Builds an array from the items of a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The resulting array")), + new FunctionSignature( + new QName("build", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Builds an array by applying a function to each item of a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The function to apply") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The resulting array")) + }; + + public ArrayBuild(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final List members = new ArrayList<>(); + + if (getArgumentCount() == 2) { + try (final FunctionReference fn = (FunctionReference) args[1].itemAt(0)) { + fn.analyze(cachedContextInfo); + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + members.add(fn.evalFunction(null, null, new Sequence[]{item.toSequence()})); + } + } + } else { + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + members.add(i.nextItem().toSequence()); + } + } + + return new ArrayType(context, members); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayFunction.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayFunction.java index ae46633a144..60da2774cb0 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayFunction.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayFunction.java @@ -27,11 +27,13 @@ import org.exist.xquery.BasicFunction; import org.exist.xquery.Cardinality; import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Expression; import org.exist.xquery.FunctionSignature; import org.exist.xquery.NamedFunctionReference; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.functions.fn.FunData; +import org.exist.xquery.value.BooleanValue; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReference; import org.exist.xquery.value.FunctionReturnSequenceType; @@ -43,6 +45,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -276,6 +279,38 @@ public class ArrayFunction extends BasicFunction { ) ); + // --- XQuery 4.0 array functions --- + public static final FunctionSignature EMPTY = functionSignature( + Fn.EMPTY.fname, + "Returns true if the supplied array is empty.", + returns(Type.BOOLEAN, "true if the array is empty"), + INPUT_ARRAY + ); + public static final FunctionSignature FOOT = functionSignature( + Fn.FOOT.fname, + "Returns the last member of an array.", + returnsOptMany(Type.ITEM, "The last member of the array"), + INPUT_ARRAY + ); + public static final FunctionSignature TRUNK = functionSignature( + Fn.TRUNK.fname, + "Returns all members except the last from a supplied array.", + returns(Type.ARRAY_ITEM, "All members except the last"), + INPUT_ARRAY + ); + public static final FunctionSignature ITEMS = functionSignature( + Fn.ITEMS.fname, + "Returns the members of an array as a sequence.", + returnsOptMany(Type.ITEM, "The members as a sequence"), + INPUT_ARRAY + ); + public static final FunctionSignature MEMBERS = functionSignature( + Fn.MEMBERS.fname, + "Returns each member of an array as a single-member array.", + returns(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, "A sequence of single-member arrays"), + INPUT_ARRAY + ); + private AnalyzeContextInfo cachedContextInfo; public ArrayFunction(XQueryContext context, FunctionSignature signature) { @@ -314,6 +349,11 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce case FOR_EACH_PAIR -> forEachPair(args); case SORT -> sort(args); case FLATTEN -> flatten(args); + case EMPTY -> empty(args); + case FOOT -> foot(args); + case TRUNK -> trunk(args); + case ITEMS -> items(args); + case MEMBERS -> members(args); }; } @@ -486,6 +526,49 @@ private static ValueSequence flatten(Sequence[] args) throws XPathException { return result; } + // --- XQuery 4.0 array function implementations --- + + private static Sequence empty(Sequence[] args) { + final ArrayType array = (ArrayType) args[0].itemAt(0); + return BooleanValue.valueOf(array.getSize() == 0); + } + + private static Sequence foot(Sequence[] args) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + if (array.getSize() == 0) { + throw new XPathException((Expression) null, ErrorCodes.FOAY0001, "Array is empty"); + } + return array.get(array.getSize() - 1); + } + + private static ArrayType trunk(Sequence[] args) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + if (array.getSize() == 0) { + return array; + } + return array.remove(array.getSize() - 1); + } + + private static Sequence items(Sequence[] args) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final ValueSequence result = new ValueSequence(array.getSize()); + for (int i = 0; i < array.getSize(); i++) { + final Sequence member = array.get(i); + result.addAll(member); + } + return result; + } + + private Sequence members(Sequence[] args) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final ValueSequence result = new ValueSequence(array.getSize()); + for (int i = 0; i < array.getSize(); i++) { + final Sequence member = array.get(i); + result.add(new ArrayType(this, context, Collections.singletonList(member))); + } + return result; + } + private Sequence getFunction(Sequence arg, FunctionE action) throws XPathException { try (final FunctionReference ref = (FunctionReference) arg.itemAt(0)) { ref.analyze(cachedContextInfo); @@ -511,7 +594,12 @@ private enum Fn { FOLD_RIGHT("fold-right"), FOR_EACH_PAIR("for-each-pair"), SORT("sort"), - FLATTEN("flatten"); + FLATTEN("flatten"), + EMPTY("empty"), + FOOT("foot"), + TRUNK("trunk"), + ITEMS("items"), + MEMBERS("members"); final static Map fnMap = new HashMap<>(); private final String fname; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexOf.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexOf.java new file mode 100644 index 00000000000..c57c93532cf --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexOf.java @@ -0,0 +1,63 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.fn.FunDeepEqual; +import org.exist.xquery.value.*; + +/** + * array:index-of($array, $target) — Returns positions of matching members. + */ +public class ArrayIndexOf extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("index-of", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns the positions of members that are deep-equal to the target.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The array to search"), + new FunctionParameterSequenceType("target", Type.ITEM, Cardinality.ZERO_OR_MORE, "The value to search for") + }, + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_MORE, "The 1-based positions")) + }; + + public ArrayIndexOf(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final Sequence target = args[1]; + final ValueSequence result = new ValueSequence(); + + for (int i = 0; i < array.getSize(); i++) { + final Sequence member = array.get(i); + if (FunDeepEqual.deepEqualsSeq(member, target, null)) { + result.add(new IntegerValue(this, i + 1)); + } + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexWhere.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexWhere.java new file mode 100644 index 00000000000..78d3b359b12 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexWhere.java @@ -0,0 +1,105 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements array:index-where (XQuery 4.0). + * + * Returns the positions in an input array of members that match a supplied + * predicate function, as a sequence of integers in ascending order. + */ +public class ArrayIndexWhere extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("index-where", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns positions of array members matching the predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, + "The predicate function") + }, + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_MORE, + "positions of matching members")) + }; + + public ArrayIndexWhere(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final int size = array.getSize(); + if (size == 0) { + return Sequence.EMPTY_SEQUENCE; + } + + try (final FunctionReference func = (FunctionReference) args[1].itemAt(0)) { + func.analyze(cachedContextInfo); + + final int arity = func.getSignature().getArgumentCount(); + final ValueSequence result = new ValueSequence(); + + for (int i = 0; i < size; i++) { + final Sequence member = array.get(i); + final Sequence[] funcArgs; + if (arity >= 2) { + funcArgs = new Sequence[] { member, new IntegerValue(this, i + 1) }; + } else { + funcArgs = new Sequence[] { member }; + } + + final Sequence predResult = func.evalFunction(null, null, funcArgs); + if (!predResult.isEmpty() && predResult.effectiveBooleanValue()) { + result.add(new IntegerValue(this, i + 1)); + } + } + return result; + } + } + + private org.exist.xquery.AnalyzeContextInfo cachedContextInfo = + new org.exist.xquery.AnalyzeContextInfo(); + + @Override + public void analyze(org.exist.xquery.AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new org.exist.xquery.AnalyzeContextInfo(contextInfo); + super.analyze(contextInfo); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayModule.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayModule.java index a9eec0d3db9..e83eebf1d4b 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayModule.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayModule.java @@ -43,28 +43,68 @@ public class ArrayModule extends AbstractInternalModule { public static final String PREFIX = "array"; private static final FunctionDef[] functions = functionDefs( - ArrayFunction.class, - ArrayFunction.SIZE, - ArrayFunction.GET, - ArrayFunction.PUT, - ArrayFunction.APPEND, - ArrayFunction.SUBARRAY_1, - ArrayFunction.SUBARRAY_2, - ArrayFunction.REMOVE, - ArrayFunction.INSERT_BEFORE, - ArrayFunction.HEAD, - ArrayFunction.TAIL, - ArrayFunction.REVERSE, - ArrayFunction.JOIN, - ArrayFunction.FOR_EACH, - ArrayFunction.FILTER, - ArrayFunction.FOLD_LEFT, - ArrayFunction.FOLD_RIGHT, - ArrayFunction.FOR_EACH_PAIR, - ArrayFunction.SORT_1, - ArrayFunction.SORT_2, - ArrayFunction.SORT_3, - ArrayFunction.FLATTEN + functionDefs( + ArrayFunction.class, + ArrayFunction.SIZE, + ArrayFunction.GET, + ArrayFunction.PUT, + ArrayFunction.APPEND, + ArrayFunction.SUBARRAY_1, + ArrayFunction.SUBARRAY_2, + ArrayFunction.REMOVE, + ArrayFunction.INSERT_BEFORE, + ArrayFunction.HEAD, + ArrayFunction.TAIL, + ArrayFunction.REVERSE, + ArrayFunction.JOIN, + ArrayFunction.FOR_EACH, + ArrayFunction.FILTER, + ArrayFunction.FOLD_LEFT, + ArrayFunction.FOLD_RIGHT, + ArrayFunction.FOR_EACH_PAIR, + ArrayFunction.SORT_1, + ArrayFunction.SORT_2, + ArrayFunction.SORT_3, + ArrayFunction.FLATTEN, + // XQuery 4.0 array functions + ArrayFunction.EMPTY, + ArrayFunction.FOOT, + ArrayFunction.TRUNK, + ArrayFunction.ITEMS, + ArrayFunction.MEMBERS + ), + functionDefs( + ArraySlice.class, + ArraySlice.signatures + ), + functionDefs( + ArrayIndexWhere.class, + ArrayIndexWhere.signatures + ), + functionDefs( + ArraySortWith.class, + ArraySortWith.signatures + ), + functionDefs( + ArraySortBy.class, + ArraySortBy.signatures + ), + functionDefs( + ArrayBuild.class, + ArrayBuild.signatures + ), + functionDefs( + ArrayIndexOf.class, + ArrayIndexOf.signatures + ), + functionDefs( + ArrayOfMembers.class, + ArrayOfMembers.signatures + ), + functionDefs( + ArraySplit.class, + ArraySplit.signatures + ) ); public ArrayModule(Map> parameters) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayOfMembers.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayOfMembers.java new file mode 100644 index 00000000000..7e0ef9d7cab --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayOfMembers.java @@ -0,0 +1,62 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +import java.util.ArrayList; +import java.util.List; + +/** + * array:of-members($input as map(xs:string, item()*)*) — Construct array from member maps. + * Inverse of array:members. + */ +public class ArrayOfMembers extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("of-members", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Constructs an array from a sequence of member maps (each with a 'value' key).", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, "The member maps") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The resulting array")) + }; + + public ArrayOfMembers(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final List members = new ArrayList<>(); + for (final SequenceIterator i = args[0].iterate(); i.hasNext(); ) { + final AbstractMapType map = (AbstractMapType) i.nextItem(); + final Sequence value = map.get(new StringValue("value")); + members.add(value != null ? value : Sequence.EMPTY_SEQUENCE); + } + return new ArrayType(context, members); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySlice.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySlice.java new file mode 100644 index 00000000000..e5037030e97 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySlice.java @@ -0,0 +1,145 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements array:slice (XQuery 4.0). + * + * Returns an array containing selected members of a supplied input array + * based on their position. Supports negative indexing and step values + * (Python-style slicing with 1-based indexing). + */ +public class ArraySlice extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("slice", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns an array containing selected members based on position.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sliced array")), + new FunctionSignature( + new QName("slice", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns an array containing selected members based on position.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sliced array")), + new FunctionSignature( + new QName("slice", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns an array containing selected members based on position.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position"), + new FunctionParameterSequenceType("end", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The end position") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sliced array")), + new FunctionSignature( + new QName("slice", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns an array containing selected members based on position.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position"), + new FunctionParameterSequenceType("end", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The end position"), + new FunctionParameterSequenceType("step", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The step value") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sliced array")) + }; + + public ArraySlice(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final int count = array.getSize(); + + if (count == 0) { + return new ArrayType(this, context, new ArrayList<>()); + } + + // Resolve start + int s; + if (args.length < 2 || args[1].isEmpty() || ((IntegerValue) args[1].itemAt(0)).getLong() == 0) { + s = 1; + } else { + final long sv = ((IntegerValue) args[1].itemAt(0)).getLong(); + s = (int) (sv < 0 ? count + sv + 1 : sv); + } + + // Resolve end + int e; + if (args.length < 3 || args[2].isEmpty() || ((IntegerValue) args[2].itemAt(0)).getLong() == 0) { + e = count; + } else { + final long ev = ((IntegerValue) args[2].itemAt(0)).getLong(); + e = (int) (ev < 0 ? count + ev + 1 : ev); + } + + // Resolve step + int step; + if (args.length < 4 || args[3].isEmpty() || ((IntegerValue) args[3].itemAt(0)).getLong() == 0) { + step = (e >= s) ? 1 : -1; + } else { + step = (int) ((IntegerValue) args[3].itemAt(0)).getLong(); + } + + // Handle negative step: reverse array and recurse with negated positions + if (step < 0) { + final ArrayType reversed = array.reverse(); + final Sequence[] newArgs = new Sequence[4]; + newArgs[0] = reversed; + newArgs[1] = new IntegerValue(this, -s); + newArgs[2] = new IntegerValue(this, -e); + newArgs[3] = new IntegerValue(this, -step); + return eval(newArgs, contextSequence); + } + + // Positive step: select members + final List result = new ArrayList<>(); + for (int pos = s; pos <= e && pos <= count; pos += step) { + if (pos >= 1) { + result.add(array.get(pos - 1)); + } + } + return new ArrayType(this, context, result); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortBy.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortBy.java new file mode 100644 index 00000000000..bf16e1d9f6a --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortBy.java @@ -0,0 +1,215 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.fn.FunCompare; +import org.exist.xquery.functions.fn.FunData; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; +import org.exist.xquery.NamedFunctionReference; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements array:sort-by (XQuery 4.0). + * + * Sorts a supplied array based on the value of sort keys supplied as + * record (map) specifications with optional key, collation, and order fields. + */ +public class ArraySortBy extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("sort-by", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Sorts the array based on sort key specifications.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("keys", Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, + "Sort key records with optional key, collation, and order fields") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sorted array")) + }; + + private AnalyzeContextInfo cachedContextInfo = new AnalyzeContextInfo(); + + public ArraySortBy(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final int size = array.getSize(); + if (size <= 1) { + return array; + } + + final Sequence keys = args[1]; + + // Parse sort key specifications + final List sortKeys = new ArrayList<>(); + if (keys.isEmpty()) { + final SortKey defaultKey = new SortKey(); + defaultKey.collator = context.getDefaultCollator(); + sortKeys.add(defaultKey); + } else { + for (final SequenceIterator ki = keys.iterate(); ki.hasNext(); ) { + final AbstractMapType keyMap = (AbstractMapType) ki.nextItem(); + sortKeys.add(parseSortKey(keyMap)); + } + } + + // Pre-compute sort keys for each member + final Sequence[][] keyValues = new Sequence[size][sortKeys.size()]; + for (int idx = 0; idx < size; idx++) { + final Sequence member = array.get(idx); + for (int k = 0; k < sortKeys.size(); k++) { + final SortKey sk = sortKeys.get(k); + if (sk.keyFunction != null) { + keyValues[idx][k] = sk.keyFunction.evalFunction(null, null, + new Sequence[]{member}); + } else { + // Default: atomize members + final ValueSequence atomized = new ValueSequence(); + for (final SequenceIterator mi = member.iterate(); mi.hasNext(); ) { + atomized.add(mi.nextItem().atomize()); + } + keyValues[idx][k] = atomized; + } + } + } + + // Build index array for stable sort + final Integer[] indices = new Integer[size]; + for (int i = 0; i < indices.length; i++) { + indices[i] = i; + } + + try { + java.util.Arrays.sort(indices, (a, b) -> { + try { + for (int k = 0; k < sortKeys.size(); k++) { + final SortKey sk = sortKeys.get(k); + final int cmp = compareKeys(keyValues[a][k], keyValues[b][k], sk.collator); + if (cmp != 0) { + return sk.descending ? -cmp : cmp; + } + } + return 0; + } catch (final XPathException e) { + throw new RuntimeException(e); + } + }); + } catch (final RuntimeException e) { + if (e.getCause() instanceof XPathException) { + throw (XPathException) e.getCause(); + } + throw e; + } + + // Build result array + final List resultMembers = new ArrayList<>(size); + for (final int idx : indices) { + resultMembers.add(array.get(idx)); + } + return new ArrayType(this, context, resultMembers); + } + + private int compareKeys(final Sequence a, final Sequence b, final Collator collator) throws XPathException { + final boolean emptyA = a.isEmpty(); + final boolean emptyB = b.isEmpty(); + if (emptyA && emptyB) return 0; + if (emptyA) return -1; + if (emptyB) return 1; + + final int len = Math.min(a.getItemCount(), b.getItemCount()); + for (int i = 0; i < len; i++) { + final AtomicValue va = a.itemAt(i).atomize(); + final AtomicValue vb = b.itemAt(i).atomize(); + final int cmp = FunCompare.compare(va, vb, collator); + if (cmp != 0) return cmp; + } + return Integer.compare(a.getItemCount(), b.getItemCount()); + } + + private SortKey parseSortKey(final AbstractMapType map) throws XPathException { + final SortKey sk = new SortKey(); + + final Sequence keySeq = map.get(new StringValue(this, "key")); + if (keySeq != null && !keySeq.isEmpty()) { + final Item keyItem = keySeq.itemAt(0); + if (!(keyItem instanceof FunctionReference)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Expected function reference for 'key', got " + Type.getTypeName(keyItem.getType())); + } + sk.keyFunction = (FunctionReference) keyItem; + sk.keyFunction.analyze(cachedContextInfo); + } + + final Sequence collSeq = map.get(new StringValue(this, "collation")); + if (collSeq != null && !collSeq.isEmpty()) { + sk.collator = context.getCollator(collSeq.getStringValue(), ErrorCodes.FOCH0002); + } else { + sk.collator = context.getDefaultCollator(); + } + + final Sequence orderSeq = map.get(new StringValue(this, "order")); + if (orderSeq != null && !orderSeq.isEmpty()) { + sk.descending = "descending".equals(orderSeq.getStringValue()); + } + + return sk; + } + + private static class SortKey { + FunctionReference keyFunction; + Collator collator; + boolean descending; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortWith.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortWith.java new file mode 100644 index 00000000000..06f48b5fd44 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortWith.java @@ -0,0 +1,144 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements array:sort-with (XQuery 4.0). + * + * Sorts a supplied array according to the order induced by one or more + * supplied comparator functions. Sort is stable. + */ +public class ArraySortWith extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("sort-with", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Sorts the array using the supplied comparator function(s).", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("comparators", Type.FUNCTION, Cardinality.ONE_OR_MORE, + "One or more comparator functions (fn(item()*, item()*) as xs:integer)") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sorted array")) + }; + + private AnalyzeContextInfo cachedContextInfo = new AnalyzeContextInfo(); + + public ArraySortWith(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final int size = array.getSize(); + if (size <= 1) { + return array; + } + + // Collect comparator functions + final Sequence comparatorsSeq = args[1]; + final List comparators = new ArrayList<>(comparatorsSeq.getItemCount()); + for (final SequenceIterator it = comparatorsSeq.iterate(); it.hasNext(); ) { + final FunctionReference ref = (FunctionReference) it.nextItem(); + ref.analyze(cachedContextInfo); + comparators.add(ref); + } + + // Build list of (index, member) to sort + final List members = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + members.add(new IndexedMember(i, array.get(i))); + } + + // Stable sort using comparator chain + try { + members.sort((a, b) -> { + try { + for (final FunctionReference comp : comparators) { + final Sequence[] funcArgs = new Sequence[] { a.value, b.value }; + final Sequence result = comp.evalFunction(null, null, funcArgs); + if (result.isEmpty()) { + continue; + } + final long cmp = ((IntegerValue) result.itemAt(0).convertTo(Type.INTEGER)).getLong(); + if (cmp != 0) { + return cmp < 0 ? -1 : 1; + } + } + return 0; + } catch (final XPathException e) { + throw new RuntimeException(e); + } + }); + } catch (final RuntimeException e) { + if (e.getCause() instanceof XPathException) { + throw (XPathException) e.getCause(); + } + throw e; + } + + // Build result array + final List resultMembers = new ArrayList<>(size); + for (final IndexedMember m : members) { + resultMembers.add(m.value); + } + + return new ArrayType(this, context, resultMembers); + } + + private static class IndexedMember { + final int index; + final Sequence value; + + IndexedMember(int index, Sequence value) { + this.index = index; + this.value = value; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySplit.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySplit.java new file mode 100644 index 00000000000..25d231d64d7 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySplit.java @@ -0,0 +1,58 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +import java.util.Collections; + +/** + * array:split($array) — Split array into sequence of single-member arrays. + */ +public class ArraySplit extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("split", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Splits an array into a sequence of single-member arrays.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The array to split") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, "A sequence of single-member arrays")) + }; + + public ArraySplit(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final ValueSequence result = new ValueSequence(array.getSize()); + for (int i = 0; i < array.getSize(); i++) { + result.add(new ArrayType(context, Collections.singletonList(array.get(i)))); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayType.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayType.java index aca00074793..883552aa869 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayType.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayType.java @@ -103,11 +103,22 @@ public Sequence get(int n) { @Override public Sequence get(final AtomicValue key) throws XPathException { - if (!Type.subTypeOf(key.getType(), Type.INTEGER)) { + final int pos; + if (Type.subTypeOf(key.getType(), Type.INTEGER)) { + pos = ((IntegerValue) key).getInt(); + } else if (Type.subTypeOf(key.getType(), Type.DECIMAL) || key.getType() == Type.DOUBLE || key.getType() == Type.FLOAT) { + // XQ4: numeric types (decimal, double, float) are accepted if they are whole numbers + final NumericValue numVal = (NumericValue) key; + if (!numVal.hasFractionalPart()) { + pos = numVal.getInt(); + } else { + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, + "Position argument for array lookup must be a whole number, got: " + key.getStringValue()); + } + } else { throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "Position argument for array lookup must be a positive integer"); } - final int pos = ((IntegerValue) key).getInt(); if (pos <= 0 || pos > getSize()) { final String startIdx = vector.length() == 0 ? "0" : "1"; final String endIdx = String.valueOf(vector.length()); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvFunctions.java new file mode 100644 index 00000000000..ea4a275ba56 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvFunctions.java @@ -0,0 +1,619 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.apache.commons.io.IOUtils; +import org.exist.dom.QName; +import org.exist.dom.memtree.MemTreeBuilder; +import org.exist.security.PermissionDeniedException; +import org.exist.source.FileSource; +import org.exist.source.Source; +import org.exist.source.SourceFactory; +import org.exist.xquery.*; +import org.exist.xquery.functions.array.ArrayType; +import io.lacuna.bifurcan.IEntry; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +/** + * Implements XQuery 4.0 CSV functions: + * fn:csv-to-arrays, fn:parse-csv, fn:csv-to-xml, fn:csv-doc. + */ +public class CsvFunctions extends BasicFunction { + + // XQ4 namespace for CSV XML output + private static final String CSV_NS = "http://www.w3.org/2005/xpath-functions"; + + // fn:csv-to-arrays signatures + public static final FunctionSignature[] FN_CSV_TO_ARRAYS = { + new FunctionSignature( + new QName("csv-to-arrays", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as a sequence of arrays.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, "A sequence of arrays, one per row")), + new FunctionSignature( + new QName("csv-to-arrays", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as a sequence of arrays, using the specified options.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Parsing options") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, "A sequence of arrays, one per row")) + }; + + // fn:parse-csv signatures + public static final FunctionSignature[] FN_PARSE_CSV = { + new FunctionSignature( + new QName("parse-csv", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as a map.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "A map with columns, column-index, rows, and get")), + new FunctionSignature( + new QName("parse-csv", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as a map, using the specified options.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Parsing options") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "A map with columns, column-index, rows, and get")) + }; + + // fn:csv-to-xml signatures + public static final FunctionSignature[] FN_CSV_TO_XML = { + new FunctionSignature( + new QName("csv-to-xml", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as an XML document.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, "An XML document representing the CSV data")), + new FunctionSignature( + new QName("csv-to-xml", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as an XML document, using the specified options.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Parsing options") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, "An XML document representing the CSV data")) + }; + + // fn:csv-doc signatures + public static final FunctionSignature[] FN_CSV_DOC = { + new FunctionSignature( + new QName("csv-doc", Function.BUILTIN_FUNCTION_NS), + "Reads CSV data from the specified URI and returns the result as a map.", + new SequenceType[]{ + new FunctionParameterSequenceType("uri", Type.STRING, Cardinality.ZERO_OR_ONE, "The URI of the CSV resource") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "A map with columns, column-index, rows, and get")), + new FunctionSignature( + new QName("csv-doc", Function.BUILTIN_FUNCTION_NS), + "Reads CSV data from the specified URI and returns the result as a map, using the specified options.", + new SequenceType[]{ + new FunctionParameterSequenceType("uri", Type.STRING, Cardinality.ZERO_OR_ONE, "The URI of the CSV resource"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Parsing options") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "A map with columns, column-index, rows, and get")) + }; + + public CsvFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("csv-doc")) { + return evalCsvDoc(args); + } + + // Empty sequence input returns empty + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String csv = args[0].getStringValue(); + final CsvParser.CsvOptions options = parseOptions(args); + + if (isCalledAs("csv-to-arrays")) { + return evalCsvToArrays(csv, options); + } else if (isCalledAs("parse-csv")) { + return evalParseCsv(csv, options); + } else if (isCalledAs("csv-to-xml")) { + return evalCsvToXml(csv, options); + } + throw new XPathException(this, ErrorCodes.XPST0017, "Unknown CSV function: " + getSignature().getName().getLocalPart()); + } + + // ==================== fn:csv-to-arrays ==================== + + private Sequence evalCsvToArrays(final String csv, final CsvParser.CsvOptions options) throws XPathException { + options.validate(this); + final CsvParser parser = new CsvParser(options, this); + final ValueSequence result = new ValueSequence(); + + parser.parse(csv, new CsvParser.CsvConverter() { + @Override + public void header(final List fields) { + // Header row is also returned as an array in csv-to-arrays + // (per XQ4 spec: "If header is true, the first row is treated as a header + // but still appears in the output") + // Actually per spec: if header=true, the header row is NOT included + // in the result of csv-to-arrays. + } + + @Override + public void record(final List fields) throws XPathException { + result.add(fieldsToArray(fields)); + } + + @Override + public void finish() { + } + }); + return result; + } + + // ==================== fn:parse-csv ==================== + + private Sequence evalParseCsv(final String csv, final CsvParser.CsvOptions options) throws XPathException { + options.validate(this); + final CsvParser parser = new CsvParser(options, this); + final List> allRows = new ArrayList<>(); + final List[] headerHolder = new List[]{null}; + + parser.parse(csv, new CsvParser.CsvConverter() { + @Override + public void header(final List fields) { + headerHolder[0] = fields; + } + + @Override + public void record(final List fields) { + allRows.add(fields); + } + + @Override + public void finish() { + } + }); + + // Explicit header from options overrides parsed header + final List effectiveHeader = options.explicitHeader != null + ? options.explicitHeader : headerHolder[0]; + + return buildParseCsvResult(effectiveHeader, allRows, options); + } + + private Sequence buildParseCsvResult(final List header, final List> rows, + final CsvParser.CsvOptions options) throws XPathException { + final MapType result = new MapType(this, context); + + // "columns" - sequence of column names (empty sequence if no header) + final Sequence columns; + if (header != null) { + final ValueSequence colSeq = new ValueSequence(header.size()); + for (final String h : header) { + colSeq.add(new StringValue(this, h)); + } + columns = colSeq; + } else { + columns = Sequence.EMPTY_SEQUENCE; + } + + // "column-index" - map from column name to 1-based position + // Empty names are excluded; duplicate names map to first occurrence + final MapType columnIndex = new MapType(this, context); + MapType colIdxResult = columnIndex; + if (header != null) { + final java.util.Set seen = new java.util.HashSet<>(); + for (int i = 0; i < header.size(); i++) { + final String name = header.get(i); + if (!name.isEmpty() && seen.add(name)) { + colIdxResult = (MapType) colIdxResult.put(new StringValue(this, name), + new IntegerValue(this, i + 1)); + } + } + } + + // "rows" - sequence of arrays + final ValueSequence rowSeq = new ValueSequence(rows.size()); + for (final List row : rows) { + rowSeq.add(fieldsToArray(row)); + } + + // Build the result map + MapType map = (MapType) result.put(new StringValue(this, "columns"), columns); + map = (MapType) map.put(new StringValue(this, "column-index"), colIdxResult); + map = (MapType) map.put(new StringValue(this, "rows"), rowSeq); + + // "get" - accessor function: fn($row as xs:integer, $column as item()) as xs:string + // $column can be an integer (1-based) or a string (column name) + final UserDefinedFunction getFunc = new UserDefinedFunction(context, + new FunctionSignature( + new QName("get", Function.BUILTIN_FUNCTION_NS), + null, + new SequenceType[]{ + new FunctionParameterSequenceType("row", Type.INTEGER, Cardinality.EXACTLY_ONE, "Row number (1-based)"), + new FunctionParameterSequenceType("column", Type.ITEM, Cardinality.EXACTLY_ONE, "Column number (1-based) or column name") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_ONE, "The field value"))); + getFunc.addVariable("row"); + getFunc.addVariable("column"); + getFunc.setFunctionBody(new CsvGetExpression(context, rowSeq, header)); + final FunctionCall getCall = new FunctionCall(context, getFunc); + getCall.setLocation(getLine(), getColumn()); + final FunctionReference getFuncRef = new FunctionReference(this, getCall); + map = (MapType) map.put(new StringValue(this, "get"), getFuncRef); + + return map; + } + + // ==================== fn:csv-to-xml ==================== + + private Sequence evalCsvToXml(final String csv, final CsvParser.CsvOptions options) throws XPathException { + options.validate(this); + final CsvParser parser = new CsvParser(options, this); + + final List[] headerHolder = new List[]{null}; + final List> allRecords = new ArrayList<>(); + + parser.parse(csv, new CsvParser.CsvConverter() { + @Override + public void header(final List fields) { + headerHolder[0] = fields; + } + + @Override + public void record(final List fields) { + allRecords.add(fields); + } + + @Override + public void finish() { + } + }); + + // Explicit header from options overrides parsed header + final List effectiveHeader = options.explicitHeader != null + ? options.explicitHeader : headerHolder[0]; + + context.pushDocumentContext(); + try { + final MemTreeBuilder builder = context.getDocumentBuilder(); + + builder.startElement(new QName("csv", CSV_NS), null); + + // Write columns element only if headers are present + if (effectiveHeader != null) { + builder.startElement(new QName("columns", CSV_NS), null); + for (final String col : effectiveHeader) { + builder.startElement(new QName("column", CSV_NS), null); + builder.characters(col); + builder.endElement(); + } + builder.endElement(); // + } + + // Write rows + builder.startElement(new QName("rows", CSV_NS), null); + for (final List record : allRecords) { + builder.startElement(new QName("row", CSV_NS), null); + // A row with a single empty field is an empty row (no field elements) + final boolean isEmptyRow = record.size() == 1 && record.get(0).isEmpty(); + if (!isEmptyRow) { + for (int f = 0; f < record.size(); f++) { + final String field = record.get(f); + builder.startElement(new QName("field", CSV_NS), null); + if (effectiveHeader != null && f < effectiveHeader.size() + && !effectiveHeader.get(f).isEmpty()) { + builder.addAttribute(new QName("column", null, null), effectiveHeader.get(f)); + } + if (!field.isEmpty()) { + builder.characters(field); + } + builder.endElement(); + } + } + builder.endElement(); // + } + builder.endElement(); // + + builder.endElement(); // + + return builder.getDocument(); + } finally { + context.popDocumentContext(); + } + } + + // ==================== fn:csv-doc ==================== + + private Sequence evalCsvDoc(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final String uri = args[0].getStringValue(); + + // Read the CSV content from the URI (same approach as fn:unparsed-text) + final String csvContent; + try { + final URI parsedUri = new URI(uri); + if (parsedUri.getFragment() != null) { + throw new XPathException(this, ErrorCodes.FODC0005, + "URI may not contain a fragment identifier: " + uri); + } + final Source source = SourceFactory.getSource(context.getBroker(), "", parsedUri.toASCIIString(), false); + if (source == null) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Could not find CSV resource: " + uri); + } + if (source instanceof FileSource && !context.getBroker().getCurrentSubject().hasDbaRole()) { + throw new PermissionDeniedException("non-dba user not allowed to read from file system"); + } + final StringWriter output = new StringWriter(); + try (final InputStream is = source.getInputStream()) { + IOUtils.copy(is, output, StandardCharsets.UTF_8); + } + csvContent = output.toString(); + } catch (final IOException | PermissionDeniedException | URISyntaxException e) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Error reading CSV resource: " + uri + " - " + e.getMessage()); + } + + final CsvParser.CsvOptions options = parseOptions(args); + return evalParseCsv(csvContent, options); + } + + // ==================== Shared utilities ==================== + + private CsvParser.CsvOptions parseOptions(final Sequence[] args) throws XPathException { + final CsvParser.CsvOptions options = new CsvParser.CsvOptions(); + if (args.length < 2 || args[1].isEmpty()) { + return options; + } + + final AbstractMapType map = (AbstractMapType) args[1].itemAt(0); + + // field-delimiter + final Sequence fdSeq = map.get(new StringValue(this, "field-delimiter")); + if (fdSeq != null && !fdSeq.isEmpty()) { + final String fd = fdSeq.getStringValue(); + if (fd.isEmpty()) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "field-delimiter must be a single character"); + } + if (fd.codePointCount(0, fd.length()) != 1) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "field-delimiter must be a single character, got: \"" + fd + "\""); + } + options.fieldDelimiter = fd.codePointAt(0); + } + + // row-delimiter + final Sequence rdSeq = map.get(new StringValue(this, "row-delimiter")); + if (rdSeq != null && !rdSeq.isEmpty()) { + if (rdSeq.getItemCount() != 1) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "row-delimiter must be a single string, got " + rdSeq.getItemCount() + " items"); + } + final String rd = rdSeq.itemAt(0).getStringValue(); + if (rd.isEmpty() || rd.codePointCount(0, rd.length()) != 1) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "row-delimiter must be a single character"); + } + options.rowDelimiter = rd.codePointAt(0); + } + + // quote-character + final Sequence qcSeq = map.get(new StringValue(this, "quote-character")); + if (qcSeq != null && !qcSeq.isEmpty()) { + final String qc = qcSeq.getStringValue(); + if (qc.isEmpty()) { + options.quoteChar = -1; // disable quoting + } else if (qc.codePointCount(0, qc.length()) != 1) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "quote-character must be a single character or empty string"); + } else { + options.quoteChar = qc.codePointAt(0); + } + } + + // trim-whitespace + final Sequence twSeq = map.get(new StringValue(this, "trim-whitespace")); + if (twSeq != null && !twSeq.isEmpty()) { + options.trimWhitespace = twSeq.effectiveBooleanValue(); + } + + // header: boolean, "present", or sequence of explicit column names + final Sequence hdrSeq = map.get(new StringValue(this, "header")); + if (hdrSeq != null && !hdrSeq.isEmpty()) { + final Item hdrItem = hdrSeq.itemAt(0); + if (hdrItem.getType() == Type.BOOLEAN) { + options.hasHeader = hdrItem.toSequence().effectiveBooleanValue(); + } else if (hdrSeq.getItemCount() == 1) { + final String hdrStr = hdrItem.getStringValue(); + if ("true".equals(hdrStr) || "present".equals(hdrStr)) { + options.hasHeader = true; + } else if ("false".equals(hdrStr) || "absent".equals(hdrStr)) { + options.hasHeader = false; + } else { + // Single string → explicit column name + options.explicitHeader = new ArrayList<>(); + options.explicitHeader.add(hdrStr); + options.hasHeader = false; // don't consume first data row + } + } else { + // Multiple items → sequence of explicit column names + options.explicitHeader = new ArrayList<>(hdrSeq.getItemCount()); + for (int j = 0; j < hdrSeq.getItemCount(); j++) { + options.explicitHeader.add(hdrSeq.itemAt(j).getStringValue()); + } + options.hasHeader = false; // don't consume first data row + } + } + + // select-columns + final Sequence scSeq = map.get(new StringValue(this, "select-columns")); + if (scSeq != null && !scSeq.isEmpty()) { + final int count = scSeq.getItemCount(); + options.selectColumns = new int[count]; + for (int j = 0; j < count; j++) { + final int col = ((IntegerValue) scSeq.itemAt(j).convertTo(Type.INTEGER)).getInt(); + if (col < 1) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "select-columns values must be positive integers, got: " + col); + } + options.selectColumns[j] = col; + } + } + + // trim-rows + final Sequence trSeq = map.get(new StringValue(this, "trim-rows")); + if (trSeq != null && !trSeq.isEmpty()) { + options.trimRows = trSeq.effectiveBooleanValue(); + } + + // Validate no unknown option keys + final java.util.Set knownKeys = java.util.Set.of( + "field-delimiter", "row-delimiter", "quote-character", + "trim-whitespace", "header", "select-columns", "trim-rows"); + for (final IEntry entry : map) { + final String key = entry.key().getStringValue(); + if (!knownKeys.contains(key)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Unknown CSV option: '" + key + "'"); + } + } + + return options; + } + + private ArrayType fieldsToArray(final List fields) throws XPathException { + // XQ4 spec: a row with a single empty field produces an empty array + if (fields.size() == 1 && fields.get(0).isEmpty()) { + return new ArrayType(this, context, new ArrayList<>()); + } + final List items = new ArrayList<>(fields.size()); + for (final String field : fields) { + items.add(new StringValue(this, field)); + } + return new ArrayType(this, context, items); + } + + /** + * Expression body for the "get" accessor function in fn:parse-csv results. + * Implements fn($row as xs:integer, $column as xs:integer) as xs:string. + * Both row and column are 1-based indexes. + */ + private static class CsvGetExpression extends AbstractExpression { + + private final ValueSequence rows; + private final List header; + + public CsvGetExpression(final XQueryContext context, final ValueSequence rows, final List header) { + super(context); + this.rows = rows; + this.header = header; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence rowIdxSeq = context.resolveVariable("row").getValue(); + final Sequence colSeq = context.resolveVariable("column").getValue(); + + if (rowIdxSeq.isEmpty() || colSeq.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final int rowIdx = ((IntegerValue) rowIdxSeq.itemAt(0).convertTo(Type.INTEGER)).getInt(); + + if (rowIdx < 1 || rowIdx > rows.getItemCount()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Resolve column: integer index or string name + final Item colItem = colSeq.itemAt(0); + final int colIdx; + if (Type.subTypeOf(colItem.getType(), Type.INTEGER)) { + colIdx = ((IntegerValue) colItem.convertTo(Type.INTEGER)).getInt(); + } else { + // String column name — look up in header + final String colName = colItem.getStringValue(); + if (header == null) { + return Sequence.EMPTY_SEQUENCE; + } + int found = -1; + for (int i = 0; i < header.size(); i++) { + if (header.get(i).equals(colName)) { + found = i + 1; // 1-based + break; + } + } + if (found == -1) { + return Sequence.EMPTY_SEQUENCE; + } + colIdx = found; + } + + final ArrayType row = (ArrayType) rows.itemAt(rowIdx - 1); + if (colIdx < 1 || colIdx > row.getSize()) { + return Sequence.EMPTY_SEQUENCE; + } + + return row.get(colIdx - 1); + } + + @Override + public int returnsType() { + return Type.STRING; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + // no-op + } + + @Override + public void dump(final org.exist.xquery.util.ExpressionDumper dumper) { + dumper.display("[csv-get]"); + } + + @Override + public String toString() { + return "[csv-get]"; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvParser.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvParser.java new file mode 100644 index 00000000000..3b1524108bb --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvParser.java @@ -0,0 +1,338 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Expression; +import org.exist.xquery.XPathException; + +import java.util.ArrayList; +import java.util.List; + +/** + * State-machine CSV parser following the XQuery 4.0 specification. + * Parses CSV text into records (rows) of fields using SAX-like callbacks. + * + * Options supported (per XQ4 spec): + * - field-delimiter (default: comma) + * - row-delimiter (default: CRLF/LF/CR) + * - quote-character (default: double-quote; empty string disables quoting) + * - trim-whitespace (default: false) + * - header (default: false; true or "present" means first row is header) + * - select-columns (default: all) + * - trim-rows (default: false; removes trailing empty rows) + */ +public class CsvParser { + + /** + * Callback interface for CSV parsing events. + */ + public interface CsvConverter { + void header(List fields) throws XPathException; + void record(List fields) throws XPathException; + void finish() throws XPathException; + } + + private final int fieldDelimiter; + private final int rowDelimiter; + private final int quoteChar; + private final boolean trimWhitespace; + private final boolean hasHeader; + private final int[] selectColumns; + private final boolean trimRows; + private final Expression expression; + + public CsvParser(final CsvOptions options, final Expression expression) { + this.fieldDelimiter = options.fieldDelimiter; + this.rowDelimiter = options.rowDelimiter; + this.quoteChar = options.quoteChar; + this.trimWhitespace = options.trimWhitespace; + this.hasHeader = options.hasHeader; + this.selectColumns = options.selectColumns; + this.trimRows = options.trimRows; + this.expression = expression; + } + + /** + * Parse CSV text, calling the converter for each record. + */ + public void parse(final String input, final CsvConverter converter) throws XPathException { + final List> allRecords = new ArrayList<>(); + List currentRecord = new ArrayList<>(); + final StringBuilder field = new StringBuilder(); + + // State: FIELD_START, IN_UNQUOTED, IN_QUOTED, AFTER_QUOTED + int state = 0; // 0=field_start, 1=in_unquoted, 2=in_quoted, 3=after_quoted + int i = 0; + final int len = input.length(); + + while (i < len) { + final int cp = input.codePointAt(i); + final int cpLen = Character.charCount(cp); + + switch (state) { + case 0: // FIELD_START — beginning of a new field + if (cp == quoteChar && quoteChar != -1) { + state = 2; // start quoted field + i += cpLen; + } else if (cp == fieldDelimiter) { + currentRecord.add(finishField(field)); + field.setLength(0); + // remain in FIELD_START + i += cpLen; + } else if (isRowDelimiter(cp)) { + currentRecord.add(finishField(field)); + field.setLength(0); + allRecords.add(currentRecord); + currentRecord = new ArrayList<>(); + i += rowDelimiterLength(input, i, cp); + } else { + field.appendCodePoint(cp); + state = 1; // in unquoted field + i += cpLen; + } + break; + + case 1: // IN_UNQUOTED — inside an unquoted field + if (cp == quoteChar && quoteChar != -1) { + // Quote in middle of unquoted field → error + throw new XPathException(expression, ErrorCodes.FOCV0001, + "Quote character found in middle of unquoted field"); + } else if (cp == fieldDelimiter) { + currentRecord.add(finishField(field)); + field.setLength(0); + state = 0; + i += cpLen; + } else if (isRowDelimiter(cp)) { + currentRecord.add(finishField(field)); + field.setLength(0); + allRecords.add(currentRecord); + currentRecord = new ArrayList<>(); + state = 0; + i += rowDelimiterLength(input, i, cp); + } else { + field.appendCodePoint(cp); + i += cpLen; + } + break; + + case 2: // IN_QUOTED — inside a quoted field + if (cp == quoteChar) { + // Check for escaped quote (doubled) + if (i + cpLen < len && input.codePointAt(i + cpLen) == quoteChar) { + field.appendCodePoint(quoteChar); + i += cpLen * 2; + } else { + // End of quoted field + state = 3; // after closing quote + i += cpLen; + } + } else { + field.appendCodePoint(cp); + i += cpLen; + } + break; + + case 3: // AFTER_QUOTED — just saw closing quote + if (cp == fieldDelimiter) { + currentRecord.add(finishField(field)); + field.setLength(0); + state = 0; + i += cpLen; + } else if (isRowDelimiter(cp)) { + currentRecord.add(finishField(field)); + field.setLength(0); + allRecords.add(currentRecord); + currentRecord = new ArrayList<>(); + state = 0; + i += rowDelimiterLength(input, i, cp); + } else if (cp == ' ' || cp == '\t') { + // Whitespace after closing quote is allowed (ignored) + i += cpLen; + } else { + // Non-delimiter content after closing quote → error + throw new XPathException(expression, ErrorCodes.FOCV0001, + "Content after closing quote in CSV field"); + } + break; + } + } + + // Check for unterminated quotes + if (state == 2) { + throw new XPathException(expression, ErrorCodes.FOCV0001, + "Unterminated quoted field in CSV input"); + } + + // Handle last field/record (if input doesn't end with row delimiter). + // A trailing row delimiter does not create an additional empty record. + // With trim-whitespace, a trailing row delimiter followed by only whitespace + // also does not create an additional record. + if (!currentRecord.isEmpty() || state == 3) { + // We had field delimiters on this line or a quoted field — always add + currentRecord.add(finishField(field)); + allRecords.add(currentRecord); + } else if (field.length() > 0) { + final String finished = finishField(field); + if (!finished.isEmpty()) { + currentRecord.add(finished); + allRecords.add(currentRecord); + } + } + + // Trim trailing empty rows if requested + if (trimRows) { + while (!allRecords.isEmpty()) { + final List lastRow = allRecords.get(allRecords.size() - 1); + if (isEmptyRow(lastRow)) { + allRecords.remove(allRecords.size() - 1); + } else { + break; + } + } + + // Normalize column count: all rows trimmed/padded to match first row (or header) + if (!allRecords.isEmpty()) { + final int columnCount = allRecords.get(0).size(); + for (int r = 1; r < allRecords.size(); r++) { + final List row = allRecords.get(r); + if (row.size() > columnCount) { + allRecords.set(r, new ArrayList<>(row.subList(0, columnCount))); + } else { + while (row.size() < columnCount) { + row.add(""); + } + } + } + } + } + + // Process header and records + int startIdx = 0; + if (hasHeader && !allRecords.isEmpty()) { + // Headers are always trimmed (per XQ4 spec), regardless of trim-whitespace option + final List headerFields = allRecords.get(0); + final List trimmedHeader = new ArrayList<>(headerFields.size()); + for (final String h : headerFields) { + trimmedHeader.add(h.trim()); + } + converter.header(selectFields(trimmedHeader)); + startIdx = 1; + } + + for (int r = startIdx; r < allRecords.size(); r++) { + converter.record(selectFields(allRecords.get(r))); + } + + converter.finish(); + } + + private String finishField(final StringBuilder field) { + if (trimWhitespace) { + return field.toString().trim(); + } + return field.toString(); + } + + private boolean isRowDelimiter(final int cp) { + if (rowDelimiter == -1) { + // Auto-detect: CR, LF, or CRLF + return cp == '\n' || cp == '\r'; + } + return cp == rowDelimiter; + } + + private int rowDelimiterLength(final String input, final int pos, final int cp) { + if (rowDelimiter == -1) { + // Auto-detect: CRLF counts as one delimiter + if (cp == '\r' && pos + 1 < input.length() && input.charAt(pos + 1) == '\n') { + return 2; + } + return 1; + } + return Character.charCount(rowDelimiter); + } + + private List selectFields(final List fields) { + if (selectColumns == null) { + return fields; + } + final List selected = new ArrayList<>(selectColumns.length); + for (final int col : selectColumns) { + if (col >= 1 && col <= fields.size()) { + selected.add(fields.get(col - 1)); + } else { + selected.add(""); + } + } + return selected; + } + + private static boolean isEmptyRow(final List row) { + for (final String field : row) { + if (!field.isEmpty()) { + return false; + } + } + return true; + } + + /** + * Parsed CSV options from an XQuery map. + */ + public static class CsvOptions { + public int fieldDelimiter = ','; + public int rowDelimiter = -1; // -1 = auto-detect (CR/LF/CRLF) + public int quoteChar = '"'; + public boolean trimWhitespace = false; + public boolean hasHeader = false; + public List explicitHeader = null; // explicit column names from options + public int[] selectColumns = null; + public boolean trimRows = false; + + /** + * Validate options per the XQ4 spec. + */ + public void validate(final Expression expression) throws XPathException { + // Field delimiter and quote character must be different + if (quoteChar != -1 && fieldDelimiter == quoteChar) { + throw new XPathException(expression, ErrorCodes.FOCV0003, + "Field delimiter and quote character must be different"); + } + // Field delimiter and row delimiter must be different + if (rowDelimiter != -1 && fieldDelimiter == rowDelimiter) { + throw new XPathException(expression, ErrorCodes.FOCV0003, + "Field delimiter and row delimiter must be different"); + } + // When using auto-detect row delimiters, field delimiter can't be CR or LF + if (rowDelimiter == -1 && (fieldDelimiter == '\n' || fieldDelimiter == '\r')) { + throw new XPathException(expression, ErrorCodes.FOCV0003, + "Field delimiter conflicts with auto-detected row delimiter (CR/LF)"); + } + // Quote character and row delimiter must be different + if (quoteChar != -1 && rowDelimiter != -1 && quoteChar == rowDelimiter) { + throw new XPathException(expression, ErrorCodes.FOCV0003, + "Quote character and row delimiter must be different"); + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/DeepEqualOptions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/DeepEqualOptions.java new file mode 100644 index 00000000000..2db251233bb --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/DeepEqualOptions.java @@ -0,0 +1,961 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import io.lacuna.bifurcan.IEntry; +import org.exist.Namespaces; +import org.exist.dom.memtree.NodeImpl; +import org.exist.dom.memtree.ReferenceNode; +import org.exist.xquery.Constants; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.InlineFunction; +import org.exist.xquery.ValueComparison; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +import javax.annotation.Nullable; +import java.text.Normalizer; +import java.util.*; + +/** + * XQuery 4.0 deep-equal options and options-aware comparison engine. + * + * Holds the parsed option flags from the options map parameter and provides + * comparison methods that respect those options. + */ +public class DeepEqualOptions { + + // Valid boolean option keys (no namespace) + private static final Set VALID_BOOLEAN_OPTIONS = Set.of( + "base-uri", "comments", "debug", + "id-property", "idrefs-property", + "in-scope-namespaces", "namespace-prefixes", "nilled-property", + "processing-instructions", "timezones", "type-annotations", + "type-variety", "typed-values" + ); + + // Valid string-valued option keys + private static final Set VALID_STRING_OPTIONS = Set.of( + "collation", "whitespace" + ); + + // Valid boolean-valued option keys (not in VALID_BOOLEAN_OPTIONS) + private static final Set VALID_ORDERED_OPTIONS = Set.of( + "ordered", "map-order" + ); + + // All valid string keys (no namespace) + private static final Set ALL_VALID_KEYS; + static { + final Set keys = new HashSet<>(); + keys.addAll(VALID_BOOLEAN_OPTIONS); + keys.addAll(VALID_STRING_OPTIONS); + keys.addAll(VALID_ORDERED_OPTIONS); + ALL_VALID_KEYS = Collections.unmodifiableSet(keys); + } + + // Option flags (defaults per XQ4 spec) + public final boolean comments; // default: false + public final boolean processingInstructions; // default: false + public final boolean ordered; // default: true + public final boolean namespacePrefixes; // default: false + public final boolean inScopeNamespaces; // default: false + public final boolean baseUri; // default: false + public final boolean idProperty; // default: false + public final boolean idrefsProperty; // default: false + public final boolean nilledProperty; // default: false + public final boolean timezones; // default: true + public final boolean typeAnnotations; // default: false + public final boolean typeVariety; // default: false + public final boolean typedValues; // default: true + public final boolean debug; // default: false + public final boolean mapOrder; // default: false + public final boolean unorderedElements; // from 'ordered' key on element comparison + + public enum WhitespaceMode { PRESERVE, NORMALIZE, STRIP } + public final WhitespaceMode whitespace; // default: PRESERVE + + @Nullable + public final Collator collator; + + /** Default options (XQ3.1 compatible behavior). */ + public static final DeepEqualOptions DEFAULTS = new DeepEqualOptions( + false, false, true, false, false, false, + false, false, false, true, false, false, true, + false, false, WhitespaceMode.PRESERVE, null + ); + + private DeepEqualOptions( + boolean comments, boolean processingInstructions, boolean ordered, + boolean namespacePrefixes, boolean inScopeNamespaces, boolean baseUri, + boolean idProperty, boolean idrefsProperty, boolean nilledProperty, + boolean timezones, boolean typeAnnotations, boolean typeVariety, + boolean typedValues, boolean debug, boolean mapOrder, + WhitespaceMode whitespace, @Nullable Collator collator) { + this.comments = comments; + this.processingInstructions = processingInstructions; + this.ordered = ordered; + this.namespacePrefixes = namespacePrefixes; + this.inScopeNamespaces = inScopeNamespaces; + this.baseUri = baseUri; + this.idProperty = idProperty; + this.idrefsProperty = idrefsProperty; + this.nilledProperty = nilledProperty; + this.timezones = timezones; + this.typeAnnotations = typeAnnotations; + this.typeVariety = typeVariety; + this.typedValues = typedValues; + this.debug = debug; + this.mapOrder = mapOrder; + this.unorderedElements = !ordered; + this.whitespace = whitespace; + this.collator = collator; + } + + /** + * Parse an XQ4 options map into a DeepEqualOptions instance. + * Validates all option keys and values per the spec. + * + * @param options the options map + * @param context the XQuery context (for collation resolution) + * @return parsed options + * @throws XPathException XPTY0004 if any option key or value is invalid + */ + public static DeepEqualOptions parse(final AbstractMapType options, final XQueryContext context) throws XPathException { + boolean comments = false; + boolean processingInstructions = false; + boolean ordered = true; + boolean namespacePrefixes = false; + boolean inScopeNamespaces = false; + boolean baseUri = false; + boolean idProperty = false; + boolean idrefsProperty = false; + boolean nilledProperty = false; + boolean timezones = true; + boolean typeAnnotations = false; + boolean typeVariety = false; + boolean typedValues = true; + boolean debug = false; + boolean mapOrder = false; + WhitespaceMode whitespace = WhitespaceMode.PRESERVE; + Collator collator = context.getDefaultCollator(); + + for (final IEntry entry : options) { + final AtomicValue key = entry.key(); + + // Keys that are QNames in a namespace are ignored (vendor extensions) + if (key.getType() == Type.QNAME) { + final QNameValue qnv = (QNameValue) key; + final String ns = qnv.getQName().getNamespaceURI(); + if (ns != null && !ns.isEmpty()) { + continue; // Ignore vendor extension options + } + // QName in no namespace → error + throw new XPathException(ErrorCodes.XPTY0004, + "Option key in no namespace is not recognized: " + key.getStringValue()); + } + + final String keyStr = key.getStringValue(); + + // Validate that the key is known + if (!ALL_VALID_KEYS.contains(keyStr)) { + throw new XPathException(ErrorCodes.XPTY0004, + "Unknown deep-equal option: '" + keyStr + "'"); + } + + final Sequence value = entry.value(); + + if (VALID_BOOLEAN_OPTIONS.contains(keyStr)) { + final boolean boolVal = parseBooleanOption(keyStr, value); + switch (keyStr) { + case "comments" -> comments = boolVal; + case "processing-instructions" -> processingInstructions = boolVal; + case "namespace-prefixes" -> namespacePrefixes = boolVal; + case "in-scope-namespaces" -> inScopeNamespaces = boolVal; + case "base-uri" -> baseUri = boolVal; + case "id-property" -> idProperty = boolVal; + case "idrefs-property" -> idrefsProperty = boolVal; + case "nilled-property" -> nilledProperty = boolVal; + case "timezones" -> timezones = boolVal; + case "type-annotations" -> typeAnnotations = boolVal; + case "type-variety" -> typeVariety = boolVal; + case "typed-values" -> typedValues = boolVal; + case "debug" -> debug = boolVal; + } + } else if (VALID_ORDERED_OPTIONS.contains(keyStr)) { + final boolean boolVal = parseBooleanOption(keyStr, value); + switch (keyStr) { + case "ordered" -> ordered = boolVal; + case "map-order" -> mapOrder = boolVal; + } + } else if ("collation".equals(keyStr)) { + if (!value.isEmpty()) { + collator = context.getCollator(value.getStringValue()); + } + } else if ("whitespace".equals(keyStr)) { + if (!value.isEmpty()) { + final String wsVal = value.getStringValue(); + whitespace = switch (wsVal) { + case "preserve" -> WhitespaceMode.PRESERVE; + case "normalize" -> WhitespaceMode.NORMALIZE; + case "strip" -> WhitespaceMode.STRIP; + default -> throw new XPathException(ErrorCodes.XPTY0004, + "Invalid whitespace option value: '" + wsVal + "'"); + }; + } + } + } + + return new DeepEqualOptions( + comments, processingInstructions, ordered, + namespacePrefixes, inScopeNamespaces, baseUri, + idProperty, idrefsProperty, nilledProperty, + timezones, typeAnnotations, typeVariety, typedValues, + debug, mapOrder, whitespace, collator + ); + } + + /** + * Parse a boolean option value using XQ4 option parameter conventions. + * Accepts: xs:boolean, xs:string ("true"/"false"/"0"/"1"), + * xs:integer (0/1), or nodes (effective boolean value). + */ + private static boolean parseBooleanOption(final String key, final Sequence value) throws XPathException { + if (value.isEmpty()) { + return false; + } + + final Item item = value.itemAt(0); + + // If it's already a boolean, use it directly + if (item.getType() == Type.BOOLEAN) { + return ((BooleanValue) item).getValue(); + } + + // Try casting to xs:boolean — accepts "true"/"false"/"0"/"1" and numeric 0/1 + try { + final AtomicValue boolVal = item.atomize().convertTo(Type.BOOLEAN); + return ((BooleanValue) boolVal).getValue(); + } catch (final XPathException e) { + throw new XPathException(ErrorCodes.XPTY0004, + "Invalid value for boolean option '" + key + "': " + item.getStringValue()); + } + } + + // ======================================================================== + // Options-aware deep comparison engine + // ======================================================================== + + /** + * Deep-compare two sequences with options. + */ + public int deepCompareSeq(final Sequence sequence1, final Sequence sequence2) { + if (sequence1 == sequence2) { + return Constants.EQUAL; + } + + if (!ordered) { + return deepCompareSeqUnordered(sequence1, sequence2); + } + + final int count1 = sequence1.getItemCount(); + final int count2 = sequence2.getItemCount(); + if (count1 != count2) { + return count1 < count2 ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (int i = 0; i < count1; i++) { + final int cmp = deepCompare(sequence1.itemAt(i), sequence2.itemAt(i)); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + return Constants.EQUAL; + } + + /** + * Unordered sequence comparison: every item in seq1 must match some + * item in seq2 (and vice versa, by equal counts of matches). + */ + private int deepCompareSeqUnordered(final Sequence sequence1, final Sequence sequence2) { + final int count1 = sequence1.getItemCount(); + final int count2 = sequence2.getItemCount(); + if (count1 != count2) { + return count1 < count2 ? Constants.INFERIOR : Constants.SUPERIOR; + } + + // For each item in seq1, find a matching item in seq2 + final boolean[] matched = new boolean[count2]; + for (int i = 0; i < count1; i++) { + final Item item1 = sequence1.itemAt(i); + boolean found = false; + for (int j = 0; j < count2; j++) { + if (!matched[j] && deepCompare(item1, sequence2.itemAt(j)) == Constants.EQUAL) { + matched[j] = true; + found = true; + break; + } + } + if (!found) { + return Constants.INFERIOR; + } + } + return Constants.EQUAL; + } + + /** + * Deep-compare two items with options. + */ + public int deepCompare(final Item item1, final Item item2) { + if (item1 == item2) { + return Constants.EQUAL; + } + + try { + // Array comparison + if (item1.getType() == Type.ARRAY_ITEM || item2.getType() == Type.ARRAY_ITEM) { + if (item1.getType() != item2.getType()) { + return Constants.INFERIOR; + } + final ArrayType array1 = (ArrayType) item1; + final ArrayType array2 = (ArrayType) item2; + if (array1.getSize() != array2.getSize()) { + return array1.getSize() < array2.getSize() ? Constants.INFERIOR : Constants.SUPERIOR; + } + for (int i = 0; i < array1.getSize(); i++) { + final int cmp = deepCompareSeq(array1.get(i), array2.get(i)); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + return Constants.EQUAL; + } + + // Map comparison + if (item1.getType() == Type.MAP_ITEM || item2.getType() == Type.MAP_ITEM) { + if (item1.getType() != item2.getType()) { + return Constants.INFERIOR; + } + return compareMaps((AbstractMapType) item1, (AbstractMapType) item2); + } + + // Function items: identity comparison via function-identity semantics + if (Type.subTypeOf(item1.getType(), Type.FUNCTION) || Type.subTypeOf(item2.getType(), Type.FUNCTION)) { + if (!Type.subTypeOf(item1.getType(), Type.FUNCTION) || !Type.subTypeOf(item2.getType(), Type.FUNCTION)) { + return Constants.INFERIOR; + } + return compareFunctionItems(item1, item2); + } + + // Atomic values + final boolean item1IsAtomic = Type.subTypeOf(item1.getType(), Type.ANY_ATOMIC_TYPE); + final boolean item2IsAtomic = Type.subTypeOf(item2.getType(), Type.ANY_ATOMIC_TYPE); + if (item1IsAtomic || item2IsAtomic) { + if (!item1IsAtomic || !item2IsAtomic) { + return item1IsAtomic ? Constants.INFERIOR : Constants.SUPERIOR; + } + return compareAtomics((AtomicValue) item1, (AtomicValue) item2); + } + + // Node comparison + if (item1.getType() != item2.getType()) { + return Constants.INFERIOR; + } + + final NodeValue nva = (NodeValue) item1; + final NodeValue nvb = (NodeValue) item2; + if (nva == nvb) { + return Constants.EQUAL; + } + + switch (item1.getType()) { + case Type.DOCUMENT: + return compareContents( + nva instanceof org.w3c.dom.Node n1 ? n1 : ((org.exist.dom.persistent.NodeProxy) nva).getOwnerDocument(), + nvb instanceof org.w3c.dom.Node n2 ? n2 : ((org.exist.dom.persistent.NodeProxy) nvb).getOwnerDocument()); + + case Type.ELEMENT: + return compareElements(nva.getNode(), nvb.getNode()); + + case Type.ATTRIBUTE: + final int attrNameCmp = compareNames(nva.getNode(), nvb.getNode()); + if (attrNameCmp != Constants.EQUAL) { + return attrNameCmp; + } + // whitespace:normalize applies to attribute values, but strip does NOT + return safeCompare( + maybeNormalizeWSAttr(nva.getNode().getNodeValue()), + maybeNormalizeWSAttr(nvb.getNode().getNodeValue()), + collator); + + case Type.PROCESSING_INSTRUCTION: + return comparePIs(nva, nvb); + + case Type.NAMESPACE: + final int nsNameCmp = safeCompare(nva.getNode().getNodeName(), nvb.getNode().getNodeName(), null); + if (nsNameCmp != Constants.EQUAL) { + return nsNameCmp; + } + return safeCompare(nva.getStringValue(), nvb.getStringValue(), collator); + + case Type.TEXT: + return safeCompare( + maybeNormalizeWS(nva.getStringValue()), + maybeNormalizeWS(nvb.getStringValue()), + collator); + + case Type.COMMENT: + // Apply whitespace normalization to comment content if whitespace option is set + return safeCompare( + maybeNormalizeWS(nva.getStringValue()), + maybeNormalizeWS(nvb.getStringValue()), + collator); + + default: + return Constants.INFERIOR; + } + } catch (final XPathException e) { + return Constants.INFERIOR; + } + } + + /** + * Compare function items using function-identity semantics (XQ4). + * Named functions with same name and arity are equal. + * Anonymous functions use reference identity. + */ + private static int compareFunctionItems(final Item item1, final Item item2) { + if (item1 == item2) { + return Constants.EQUAL; + } + if (item1 instanceof FunctionReference ref1 && item2 instanceof FunctionReference ref2) { + final FunctionSignature sig1 = ref1.getSignature(); + final FunctionSignature sig2 = ref2.getSignature(); + final org.exist.dom.QName name1 = sig1.getName(); + final org.exist.dom.QName name2 = sig2.getName(); + // Both must be named functions (not inline/anonymous) + if (name1 != null && name2 != null + && name1 != InlineFunction.INLINE_FUNCTION_QNAME + && name2 != InlineFunction.INLINE_FUNCTION_QNAME) { + if (name1.equals(name2) && sig1.getArgumentCount() == sig2.getArgumentCount()) { + return Constants.EQUAL; + } + } + } + return Constants.INFERIOR; + } + + private int compareMaps(final AbstractMapType map1, final AbstractMapType map2) { + if (map1.size() != map2.size()) { + return map1.size() < map2.size() ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (final IEntry entry1 : map1) { + if (!map2.contains(entry1.key())) { + return Constants.SUPERIOR; + } + final int cmp = deepCompareSeq(entry1.value(), map2.get(entry1.key())); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + return Constants.EQUAL; + } + + private int compareAtomics(final AtomicValue av, final AtomicValue bv) { + try { + // Whitespace normalization for string-like atomics + if (whitespace != WhitespaceMode.PRESERVE) { + if (isStringLike(av) && isStringLike(bv)) { + final String a = applyWhitespace(av.getStringValue()); + final String b = applyWhitespace(bv.getStringValue()); + if (collator != null) { + return collator.compare(a, b); + } + return a.compareTo(b); + } + } + + if (Type.subTypeOfUnion(av.getType(), Type.NUMERIC) && + Type.subTypeOfUnion(bv.getType(), Type.NUMERIC)) { + if (((NumericValue) av).isNaN() && ((NumericValue) bv).isNaN()) { + return Constants.EQUAL; + } + } + return ValueComparison.compareAtomic(collator, av, bv); + } catch (final XPathException e) { + return Constants.INFERIOR; + } + } + + private static boolean isStringLike(final AtomicValue v) { + return Type.subTypeOf(v.getType(), Type.STRING) || + v.getType() == Type.UNTYPED_ATOMIC || + v.getType() == Type.ANY_URI; + } + + private int compareElements(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + int cmp = compareNames(a, b); + if (cmp != Constants.EQUAL) { + return cmp; + } + + // Compare namespace prefixes if option is set + if (namespacePrefixes) { + cmp = safeCompare(a.getPrefix(), b.getPrefix(), null); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + + cmp = compareAttributes(a, b); + if (cmp != Constants.EQUAL) { + return cmp; + } + + if (unorderedElements) { + return compareContentsUnordered(a, b); + } + + return compareContents(a, b); + } + + private int comparePIs(final NodeValue nva, final NodeValue nvb) throws XPathException { + final int nameCmp = safeCompare(nva.getNode().getNodeName(), nvb.getNode().getNodeName(), null); + if (nameCmp != Constants.EQUAL) { + return nameCmp; + } + // Apply whitespace normalization to PI data content + return safeCompare( + maybeNormalizeWS(nva.getStringValue()), + maybeNormalizeWS(nvb.getStringValue()), + collator); + } + + private int compareContents(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + final List childrenA = getSignificantChildren(a); + final List childrenB = getSignificantChildren(b); + + // Merge adjacent text nodes + final List mergedA = mergeAdjacentTextNodes(childrenA); + final List mergedB = mergeAdjacentTextNodes(childrenB); + + if (mergedA.size() != mergedB.size()) { + return mergedA.size() < mergedB.size() ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (int i = 0; i < mergedA.size(); i++) { + final Object itemA = mergedA.get(i); + final Object itemB = mergedB.get(i); + + if (itemA instanceof String sa && itemB instanceof String sb) { + // Text may already be normalized/stripped by addMergedText; apply WS normalization if PRESERVE mode + final String normA = whitespace == WhitespaceMode.PRESERVE ? sa : maybeNormalizeWS(sa); + final String normB = whitespace == WhitespaceMode.PRESERVE ? sb : maybeNormalizeWS(sb); + final int cmp = safeCompare(normA, normB, collator); + if (cmp != Constants.EQUAL) { + return cmp; + } + } else if (itemA instanceof org.w3c.dom.Node na && itemB instanceof org.w3c.dom.Node nb) { + final int typeA = getEffectiveNodeType(na); + final int typeB = getEffectiveNodeType(nb); + if (typeA != typeB) { + return Constants.INFERIOR; + } + final int cmp; + switch (typeA) { + case org.w3c.dom.Node.ELEMENT_NODE: + cmp = compareElements(na, nb); + break; + case org.w3c.dom.Node.COMMENT_NODE: + cmp = safeCompare(maybeNormalizeWS(na.getNodeValue()), + maybeNormalizeWS(nb.getNodeValue()), collator); + break; + case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE: + final int piNameCmp = safeCompare(na.getNodeName(), nb.getNodeName(), null); + cmp = piNameCmp != Constants.EQUAL ? piNameCmp : + safeCompare(maybeNormalizeWS(na.getNodeValue()), + maybeNormalizeWS(nb.getNodeValue()), collator); + break; + default: + cmp = Constants.INFERIOR; + } + if (cmp != Constants.EQUAL) { + return cmp; + } + } else { + // Mismatched types (text vs node) + return Constants.INFERIOR; + } + } + return Constants.EQUAL; + } + + /** + * Unordered element comparison: child elements are compared as multisets. + */ + private int compareContentsUnordered(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + final List childrenA = getSignificantChildren(a); + final List childrenB = getSignificantChildren(b); + + // Separate text content and element/other nodes + final StringBuilder textA = new StringBuilder(); + final List elementsA = new ArrayList<>(); + for (final org.w3c.dom.Node n : childrenA) { + final int type = getEffectiveNodeType(n); + if (type == org.w3c.dom.Node.TEXT_NODE) { + textA.append(getNodeValue(n)); + } else { + elementsA.add(n); + } + } + + final StringBuilder textB = new StringBuilder(); + final List elementsB = new ArrayList<>(); + for (final org.w3c.dom.Node n : childrenB) { + final int type = getEffectiveNodeType(n); + if (type == org.w3c.dom.Node.TEXT_NODE) { + textB.append(getNodeValue(n)); + } else { + elementsB.add(n); + } + } + + // Compare concatenated text content + final int textCmp = safeCompare( + maybeNormalizeWS(textA.toString()), + maybeNormalizeWS(textB.toString()), + collator); + if (textCmp != Constants.EQUAL) { + return textCmp; + } + + // Compare elements as multisets + if (elementsA.size() != elementsB.size()) { + return elementsA.size() < elementsB.size() ? Constants.INFERIOR : Constants.SUPERIOR; + } + + final boolean[] matched = new boolean[elementsB.size()]; + for (final org.w3c.dom.Node na : elementsA) { + boolean found = false; + for (int j = 0; j < elementsB.size(); j++) { + if (!matched[j]) { + final int typeA = getEffectiveNodeType(na); + final int typeB = getEffectiveNodeType(elementsB.get(j)); + if (typeA == typeB) { + int cmp; + if (typeA == org.w3c.dom.Node.ELEMENT_NODE) { + cmp = compareElements(na, elementsB.get(j)); + } else if (typeA == org.w3c.dom.Node.COMMENT_NODE) { + cmp = safeCompare(na.getNodeValue(), elementsB.get(j).getNodeValue(), collator); + } else if (typeA == org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE) { + cmp = safeCompare(na.getNodeName(), elementsB.get(j).getNodeName(), null); + if (cmp == Constants.EQUAL) { + cmp = safeCompare(na.getNodeValue(), elementsB.get(j).getNodeValue(), collator); + } + } else { + cmp = Constants.INFERIOR; + } + if (cmp == Constants.EQUAL) { + matched[j] = true; + found = true; + break; + } + } + } + } + if (!found) { + return Constants.INFERIOR; + } + } + return Constants.EQUAL; + } + + /** + * Get child nodes that are significant for deep-equal comparison, + * based on the current options. + */ + private List getSignificantChildren(final org.w3c.dom.Node parent) { + final List result = new ArrayList<>(); + final boolean preserveWS = isXmlSpacePreserve(parent); + org.w3c.dom.Node child = parent.getFirstChild(); + while (child != null) { + final int type = getEffectiveNodeType(child); + switch (type) { + case org.w3c.dom.Node.ELEMENT_NODE: + result.add(child); + break; + case org.w3c.dom.Node.TEXT_NODE: + if (whitespace == WhitespaceMode.STRIP && !preserveWS) { + // Strip whitespace-only text nodes (unless xml:space="preserve") + final String value = getNodeValue(child); + if (value != null && !value.trim().isEmpty()) { + result.add(child); + } + } else { + result.add(child); + } + break; + case org.w3c.dom.Node.COMMENT_NODE: + if (comments) { + result.add(child); + } + break; + case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE: + if (processingInstructions) { + result.add(child); + } + break; + } + child = child.getNextSibling(); + } + return result; + } + + /** + * Merge adjacent text nodes into single String entries. + * Non-text nodes are kept as-is. This handles the case where + * comments/PIs split text differently in two trees. + */ + private List mergeAdjacentTextNodes(final List nodes) { + final List result = new ArrayList<>(); + StringBuilder currentText = null; + + for (final org.w3c.dom.Node node : nodes) { + final int type = getEffectiveNodeType(node); + if (type == org.w3c.dom.Node.TEXT_NODE) { + if (currentText == null) { + currentText = new StringBuilder(); + } + currentText.append(getNodeValue(node)); + } else { + if (currentText != null) { + addMergedText(result, currentText.toString()); + currentText = null; + } + result.add(node); + } + } + if (currentText != null) { + addMergedText(result, currentText.toString()); + } + return result; + } + + /** + * Add merged text to the result list, applying whitespace rules. + * In STRIP mode, whitespace-only text is dropped. + * In NORMALIZE mode, text that normalizes to empty is dropped. + */ + private void addMergedText(final List result, final String text) { + if (whitespace == WhitespaceMode.STRIP) { + if (!text.trim().isEmpty()) { + result.add(text); + } + } else if (whitespace == WhitespaceMode.NORMALIZE) { + final String normalized = normalizeWhitespace(text); + if (!normalized.isEmpty()) { + result.add(normalized); + } + } else { + result.add(text); + } + } + + private int compareAttributes(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + final org.w3c.dom.NamedNodeMap nnma = a.getAttributes(); + final org.w3c.dom.NamedNodeMap nnmb = b.getAttributes(); + + final int aCount = getAttrCount(nnma); + final int bCount = getAttrCount(nnmb); + + if (aCount != bCount) { + return aCount < bCount ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (int i = 0; i < nnma.getLength(); i++) { + final org.w3c.dom.Node ta = nnma.item(i); + final String nsA = ta.getNamespaceURI(); + if (nsA != null && Namespaces.XMLNS_NS.equals(nsA)) { + continue; + } + final org.w3c.dom.Node tb = ta.getLocalName() == null ? + nnmb.getNamedItem(ta.getNodeName()) : + nnmb.getNamedItemNS(ta.getNamespaceURI(), ta.getLocalName()); + if (tb == null) { + return Constants.SUPERIOR; + } + final int cmp = safeCompare( + maybeNormalizeWSAttr(ta.getNodeValue()), + maybeNormalizeWSAttr(tb.getNodeValue()), + collator); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + return Constants.EQUAL; + } + + // ======================================================================== + // Utility methods + // ======================================================================== + + private String maybeNormalizeWS(@Nullable final String s) { + if (s == null || whitespace == WhitespaceMode.PRESERVE) { + return s; + } + // Both NORMALIZE and STRIP normalize text content + return normalizeWhitespace(s); + } + + /** + * Normalize whitespace for attribute values: only NORMALIZE mode applies, + * STRIP mode does NOT affect attribute values. + */ + private String maybeNormalizeWSAttr(@Nullable final String s) { + if (s == null || whitespace != WhitespaceMode.NORMALIZE) { + return s; + } + return normalizeWhitespace(s); + } + + private static String normalizeWhitespace(final String s) { + return s.strip().replaceAll("\\s+", " "); + } + + private String applyWhitespace(final String s) { + if (whitespace == WhitespaceMode.NORMALIZE) { + return normalizeWhitespace(s); + } + if (whitespace == WhitespaceMode.STRIP) { + return normalizeWhitespace(s); + } + return s; + } + + private static int getAttrCount(final org.w3c.dom.NamedNodeMap nnm) { + int count = 0; + for (int i = 0; i < nnm.getLength(); i++) { + final org.w3c.dom.Node n = nnm.item(i); + final String ns = n.getNamespaceURI(); + if (ns == null || !Namespaces.XMLNS_NS.equals(ns)) { + ++count; + } + } + return count; + } + + private static int compareNames(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + if (a.getLocalName() != null || b.getLocalName() != null) { + final int nsComparison = safeCompare(a.getNamespaceURI(), b.getNamespaceURI(), null); + if (nsComparison != Constants.EQUAL) { + return nsComparison; + } + return safeCompare(a.getLocalName(), b.getLocalName(), null); + } + return safeCompare(a.getNodeName(), b.getNodeName(), null); + } + + private static int safeCompare(@Nullable final String a, @Nullable final String b, @Nullable final Collator collator) { + if (a == b) { + return Constants.EQUAL; + } + if (a == null) { + return Constants.INFERIOR; + } + if (b == null) { + return Constants.SUPERIOR; + } + if (collator != null) { + return collator.compare(a, b); + } + return a.compareTo(b); + } + + private static String getNodeValue(final org.w3c.dom.Node n) { + if (n.getNodeType() == NodeImpl.REFERENCE_NODE) { + return ((ReferenceNode) n).getReference().getNodeValue(); + } + return n.getNodeValue(); + } + + private static int getEffectiveNodeType(final org.w3c.dom.Node n) { + int nodeType = n.getNodeType(); + if (nodeType == NodeImpl.REFERENCE_NODE) { + nodeType = ((ReferenceNode) n).getReference().getNode().getNodeType(); + } + return nodeType; + } + + /** + * Check if the given node or any ancestor has xml:space="preserve". + * Uses NamedNodeMap lookup for broader DOM compatibility. + */ + private static boolean isXmlSpacePreserve(final org.w3c.dom.Node node) { + org.w3c.dom.Node current = node; + while (current != null && current.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + if (current instanceof org.w3c.dom.Element elem) { + // Use Element.getAttributeNS for persistent DOM and other implementations + final String xmlSpace = elem.getAttributeNS( + "http://www.w3.org/XML/1998/namespace", "space"); + if ("preserve".equals(xmlSpace)) { + return true; + } + if ("default".equals(xmlSpace)) { + return false; + } + } + // Also check via NamedNodeMap for broader compatibility + final org.w3c.dom.NamedNodeMap attrs = current.getAttributes(); + if (attrs != null) { + org.w3c.dom.Node xmlSpace = attrs.getNamedItemNS( + "http://www.w3.org/XML/1998/namespace", "space"); + if (xmlSpace == null) { + xmlSpace = attrs.getNamedItem("xml:space"); + } + if (xmlSpace != null) { + final String val = xmlSpace.getNodeValue(); + if ("preserve".equals(val)) { + return true; + } + if ("default".equals(val)) { + return false; + } + } + } + current = current.getParentNode(); + } + return false; + } + + /** + * Deep equality using these options. + */ + public boolean deepEqualsSeq(final Sequence sequence1, final Sequence sequence2) { + return deepCompareSeq(sequence1, sequence2) == Constants.EQUAL; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java index e298e084cc8..2f14003d15b 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java @@ -59,7 +59,7 @@ public class ExtCollection extends BasicFunction { arities( arity(), arity( - optParam("collection-uri", Type.STRING,"The Collection URI") + optParam("source", Type.STRING,"The Collection URI") ) ) ); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAllEqualDifferent.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAllEqualDifferent.java new file mode 100644 index 00000000000..c8825265991 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAllEqualDifferent.java @@ -0,0 +1,165 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:all-equal and fn:all-different (XQuery 4.0). + */ +public class FnAllEqualDifferent extends BasicFunction { + + public static final FunctionSignature[] FN_ALL_EQUAL = { + new FunctionSignature( + new QName("all-equal", Function.BUILTIN_FUNCTION_NS), + "Returns true if all items in the supplied sequence are equal.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The values to compare") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all values are equal")), + new FunctionSignature( + new QName("all-equal", Function.BUILTIN_FUNCTION_NS), + "Returns true if all items in the supplied sequence are equal, using the specified collation.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The values to compare"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all values are equal")) + }; + + public static final FunctionSignature[] FN_ALL_DIFFERENT = { + new FunctionSignature( + new QName("all-different", Function.BUILTIN_FUNCTION_NS), + "Returns true if no two items in the supplied sequence are equal.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The values to compare") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all values are different")), + new FunctionSignature( + new QName("all-different", Function.BUILTIN_FUNCTION_NS), + "Returns true if no two items in the supplied sequence are equal, using the specified collation.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The values to compare"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all values are different")) + }; + + public FnAllEqualDifferent(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence values = args[0]; + if (values.getItemCount() <= 1) { + return BooleanValue.TRUE; + } + + final Collator collator = getCollator(args); + + // Collect all atomized values + final java.util.List items = new java.util.ArrayList<>(values.getItemCount()); + for (final SequenceIterator i = values.iterate(); i.hasNext(); ) { + items.add(i.nextItem().atomize()); + } + + if (isCalledAs("all-equal")) { + return allEqual(items, collator); + } else { + return allDifferent(items, collator); + } + } + + private Sequence allEqual(final java.util.List items, final Collator collator) throws XPathException { + // all-equal iff count(distinct-values) <= 1, using contextual equality + final AtomicValue first = items.get(0); + for (int i = 1; i < items.size(); i++) { + if (!contextuallyEqual(first, items.get(i), collator)) { + return BooleanValue.FALSE; + } + } + return BooleanValue.TRUE; + } + + private Sequence allDifferent(final java.util.List items, final Collator collator) throws XPathException { + // all-different iff count(distinct-values) == count + for (int i = 0; i < items.size(); i++) { + for (int j = i + 1; j < items.size(); j++) { + if (contextuallyEqual(items.get(i), items.get(j), collator)) { + return BooleanValue.FALSE; + } + } + } + return BooleanValue.TRUE; + } + + /** + * XQ4 contextual equality: two values are contextually equal if fn:compare returns 0. + * NaN is treated as equal to NaN. Errors in comparison mean values are unequal. + */ + static boolean contextuallyEqual(final AtomicValue v1, final AtomicValue v2, final Collator collator) { + try { + // NaN handling: NaN equals NaN + if (v1 instanceof NumericValue && v2 instanceof NumericValue) { + final boolean nan1 = ((NumericValue) v1).isNaN(); + final boolean nan2 = ((NumericValue) v2).isNaN(); + if (nan1 && nan2) { + return true; + } + if (nan1 || nan2) { + return false; + } + } + return FunCompare.compare(v1, v2, collator) == 0; + } catch (final Exception e) { + // Errors in comparison mean values are unequal + return false; + } + } + + private Collator getCollator(final Sequence[] args) throws XPathException { + if (args.length > 1 && !args[1].isEmpty()) { + final String collationURI = args[1].getStringValue(); + return context.getCollator(collationURI, ErrorCodes.FOCH0002); + } + return context.getDefaultCollator(); + } + +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAtomicEqual.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAtomicEqual.java new file mode 100644 index 00000000000..0836fb00e66 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAtomicEqual.java @@ -0,0 +1,214 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AbstractDateTimeValue; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.BinaryValue; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.FloatValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:atomic-equal (XQuery 4.0). + * + * Compares two atomic values for equality. Unlike eq, this function: + * - Never raises a dynamic error (returns false for incomparable types) + * - NaN equals NaN + * - Does not depend on static or dynamic context + */ +public class FnAtomicEqual extends BasicFunction { + + public static final FunctionSignature FN_ATOMIC_EQUAL = new FunctionSignature( + new QName("atomic-equal", Function.BUILTIN_FUNCTION_NS), + "Compares two atomic values for equality. NaN equals NaN, and incomparable types return false.", + new SequenceType[] { + new FunctionParameterSequenceType("value1", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The first value"), + new FunctionParameterSequenceType("value2", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The second value") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if the values are equal")); + + public FnAtomicEqual(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final AtomicValue v1 = args[0].itemAt(0).atomize(); + final AtomicValue v2 = args[1].itemAt(0).atomize(); + + // Handle NaN: NaN equals NaN (across float/double) + if (isNaN(v1) && isNaN(v2)) { + return BooleanValue.TRUE; + } + if (isNaN(v1) || isNaN(v2)) { + return BooleanValue.FALSE; + } + + // Handle Infinity: float INF equals double INF (and -INF) + if (isInfinite(v1) && isInfinite(v2)) { + return BooleanValue.valueOf(toDouble(v1) == toDouble(v2)); + } + + try { + final int t1 = v1.getType(); + final int t2 = v2.getType(); + + // String-like types: string, untypedAtomic, anyURI all compare equal + if (isStringLike(t1) && isStringLike(t2)) { + return BooleanValue.valueOf(v1.getStringValue().equals(v2.getStringValue())); + } + + // Numeric: compare by mathematical value regardless of type + // Per XQ4 spec: "Two numeric values are equal if their mathematical values are equal" + if (v1 instanceof NumericValue && v2 instanceof NumericValue) { + return BooleanValue.valueOf(numericEqual((NumericValue) v1, (NumericValue) v2)); + } + + // Binary types: hexBinary and base64Binary compare equal by content + if (isBinaryType(t1) && isBinaryType(t2)) { + if (v1 instanceof BinaryValue && v2 instanceof BinaryValue) { + return BooleanValue.valueOf(v1.compareTo(null, v2) == 0); + } + return BooleanValue.FALSE; + } + + // Boolean + if (t1 == Type.BOOLEAN && t2 == Type.BOOLEAN) { + return BooleanValue.valueOf(v1.effectiveBooleanValue() == v2.effectiveBooleanValue()); + } + + // Date/time: values with timezone never equal values without timezone + if (v1 instanceof AbstractDateTimeValue && v2 instanceof AbstractDateTimeValue) { + final AbstractDateTimeValue dt1 = (AbstractDateTimeValue) v1; + final AbstractDateTimeValue dt2 = (AbstractDateTimeValue) v2; + if (dt1.hasTimezone() != dt2.hasTimezone()) { + return BooleanValue.FALSE; + } + } + + // Different base types are never equal + if (t1 != t2 && !Type.subTypeOf(t1, t2) && !Type.subTypeOf(t2, t1)) { + return BooleanValue.FALSE; + } + + // Same type — compare by value + final int cmp = v1.compareTo(null, v2); + return BooleanValue.valueOf(cmp == 0); + } catch (final XPathException | RuntimeException e) { + // Incomparable types or indeterminate ordering — return false per spec + return BooleanValue.FALSE; + } + } + + private static boolean isNaN(final AtomicValue v) { + if (v instanceof DoubleValue) { + return Double.isNaN(((DoubleValue) v).getDouble()); + } + if (v instanceof FloatValue) { + return Float.isNaN(((FloatValue) v).getValue()); + } + return false; + } + + private static boolean isInfinite(final AtomicValue v) { + if (v instanceof DoubleValue) { + return Double.isInfinite(((DoubleValue) v).getDouble()); + } + if (v instanceof FloatValue) { + return Float.isInfinite(((FloatValue) v).getValue()); + } + return false; + } + + private static double toDouble(final AtomicValue v) { + if (v instanceof DoubleValue) { + return ((DoubleValue) v).getDouble(); + } + if (v instanceof FloatValue) { + return ((FloatValue) v).getValue(); + } + return 0; + } + + static boolean numericEqual(final NumericValue v1, final NumericValue v2) throws XPathException { + // Both floating-point: use double comparison (handles 0.0 == -0.0) + if ((v1 instanceof DoubleValue || v1 instanceof FloatValue) + && (v2 instanceof DoubleValue || v2 instanceof FloatValue)) { + return v1.getDouble() == v2.getDouble(); + } + // Mixed floating-point and exact: convert to BigDecimal for exact mathematical comparison + // This handles cases like atomic-equal(16777218, xs:double("16777218")) + final java.math.BigDecimal bd1 = numericToBigDecimal(v1); + final java.math.BigDecimal bd2 = numericToBigDecimal(v2); + return bd1.compareTo(bd2) == 0; + } + + private static java.math.BigDecimal numericToBigDecimal(final NumericValue v) throws XPathException { + if (v instanceof DoubleValue) { + // Use new BigDecimal(double) for exact binary representation, + // not valueOf() which rounds via Double.toString() + return new java.math.BigDecimal(((DoubleValue) v).getDouble()); + } + if (v instanceof FloatValue) { + return new java.math.BigDecimal(((FloatValue) v).getValue()); + } + // Integer and decimal types: parse from string for exact representation + return new java.math.BigDecimal(v.getStringValue()); + } + + private static int primitiveNumericType(final int type) { + if (Type.subTypeOf(type, Type.INTEGER)) { + return Type.INTEGER; + } + if (Type.subTypeOf(type, Type.DECIMAL)) { + return Type.DECIMAL; + } + if (type == Type.FLOAT) { + return Type.FLOAT; + } + return Type.DOUBLE; + } + + private static boolean isStringLike(final int type) { + return Type.subTypeOf(type, Type.STRING) + || type == Type.UNTYPED_ATOMIC + || Type.subTypeOf(type, Type.ANY_URI); + } + + private static boolean isBinaryType(final int type) { + return type == Type.HEX_BINARY || type == Type.BASE64_BINARY; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnBuildUri.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnBuildUri.java new file mode 100644 index 00000000000..bcd257a3869 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnBuildUri.java @@ -0,0 +1,335 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +/** + * Implements fn:build-uri (XQuery 4.0). + * + * Constructs a URI from the parts provided in a map. + */ +public class FnBuildUri extends BasicFunction { + + private static final Set NON_HIERARCHICAL_SCHEMES = new HashSet<>(Arrays.asList( + "mailto", "news", "urn", "tel", "tag", "jar", "data", "javascript", "cid", "mid" + )); + + public static final FunctionSignature[] FN_BUILD_URI = { + new FunctionSignature( + new QName("build-uri", Function.BUILTIN_FUNCTION_NS), + "Constructs a URI from the parts provided.", + new SequenceType[] { + new FunctionParameterSequenceType("parts", Type.MAP_ITEM, + Cardinality.EXACTLY_ONE, "Map of URI components") + }, + new FunctionReturnSequenceType(Type.STRING, + Cardinality.EXACTLY_ONE, "The constructed URI")), + new FunctionSignature( + new QName("build-uri", Function.BUILTIN_FUNCTION_NS), + "Constructs a URI from the parts provided.", + new SequenceType[] { + new FunctionParameterSequenceType("parts", Type.MAP_ITEM, + Cardinality.EXACTLY_ONE, "Map of URI components"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, + Cardinality.ZERO_OR_ONE, "Options map") + }, + new FunctionReturnSequenceType(Type.STRING, + Cardinality.EXACTLY_ONE, "The constructed URI")) + }; + + public FnBuildUri(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final MapType parts = (MapType) args[0].itemAt(0); + + // Parse options + boolean allowDeprecated = false; + boolean omitDefaultPorts = false; + boolean uncPath = false; + if (args.length > 1 && !args[1].isEmpty()) { + final MapType options = (MapType) args[1].itemAt(0); + allowDeprecated = getBooleanOption(options, "allow-deprecated-features", false); + omitDefaultPorts = getBooleanOption(options, "omit-default-ports", false); + uncPath = getBooleanOption(options, "unc-path", false); + } + + final StringBuilder uri = new StringBuilder(); + + // Get scheme + final String scheme = getStringValue(parts, "scheme"); + + // Determine if hierarchical + boolean hierarchical = true; + final Sequence hierSeq = parts.get(new StringValue(this, "hierarchical")); + if (hierSeq != null && !hierSeq.isEmpty()) { + hierarchical = hierSeq.effectiveBooleanValue(); + } else if (scheme != null) { + hierarchical = !NON_HIERARCHICAL_SCHEMES.contains(scheme.toLowerCase()); + } + + // Add scheme + if (scheme != null) { + uri.append(scheme); + if (!hierarchical) { + uri.append(':'); + } else if ("file".equalsIgnoreCase(scheme) && uncPath) { + uri.append(":////"); + } else { + uri.append("://"); + } + } + + // Build authority from components or use authority directly + final String userinfo = getStringValue(parts, "userinfo"); + final String host = getStringValue(parts, "host"); + final Sequence portSeq = parts.get(new StringValue(this, "port")); + Integer port = null; + if (portSeq != null && !portSeq.isEmpty()) { + port = ((Number) portSeq.itemAt(0).toJavaObject(Long.class)).intValue(); + } + + // Handle deprecated password in userinfo + String effectiveUserinfo = userinfo; + if (!allowDeprecated && effectiveUserinfo != null && effectiveUserinfo.contains(":")) { + final String password = effectiveUserinfo.substring(effectiveUserinfo.indexOf(':') + 1); + if (!password.isEmpty()) { + effectiveUserinfo = null; + } + } + + // Omit default ports + if (omitDefaultPorts && port != null && scheme != null) { + if (isDefaultPort(scheme.toLowerCase(), port)) { + port = null; + } + } + + if (effectiveUserinfo != null || host != null || port != null) { + if (scheme == null) { + uri.append("//"); + } + if (effectiveUserinfo != null) { + uri.append(effectiveUserinfo).append('@'); + } + if (host != null) { + uri.append(host); + } + if (port != null) { + uri.append(':').append(port); + } + } else { + final String authority = getStringValue(parts, "authority"); + if (authority != null) { + if (scheme == null) { + uri.append("//"); + } + uri.append(authority); + } + } + + // Build path from path-segments or use path directly + final Sequence pathSegments = parts.get(new StringValue(this, "path-segments")); + if (pathSegments != null && !pathSegments.isEmpty()) { + final StringBuilder pathBuilder = new StringBuilder(); + boolean first = true; + for (final SequenceIterator i = pathSegments.iterate(); i.hasNext(); ) { + if (!first) { + pathBuilder.append('/'); + } + first = false; + final String segment = i.nextItem().getStringValue(); + if (hierarchical) { + pathBuilder.append(encodePathSegment(segment)); + } else { + pathBuilder.append(segment); + } + } + uri.append(pathBuilder); + } else { + final String path = getStringValue(parts, "path"); + if (path != null) { + uri.append(path); + } + } + + // Build query from query-parameters or use query directly + final Sequence queryParamsSeq = parts.get(new StringValue(this, "query-parameters")); + if (queryParamsSeq != null && !queryParamsSeq.isEmpty() && queryParamsSeq.itemAt(0) instanceof MapType) { + final MapType queryParams = (MapType) queryParamsSeq.itemAt(0); + final StringBuilder queryBuilder = new StringBuilder(); + boolean first = true; + for (final SequenceIterator ki = queryParams.keys().iterate(); ki.hasNext(); ) { + final StringValue key = (StringValue) ki.nextItem(); + final Sequence values = queryParams.get(key); + for (final SequenceIterator vi = values.iterate(); vi.hasNext(); ) { + if (!first) { + queryBuilder.append('&'); + } + first = false; + final String keyStr = key.getStringValue(); + final String valStr = vi.nextItem().getStringValue(); + if (keyStr.isEmpty()) { + queryBuilder.append(encodeQueryComponent(valStr)); + } else { + queryBuilder.append(encodeQueryComponent(keyStr)) + .append('=') + .append(encodeQueryComponent(valStr)); + } + } + } + if (queryBuilder.length() > 0) { + uri.append('?').append(queryBuilder); + } + } else { + final String query = getStringValue(parts, "query"); + if (query != null) { + uri.append('?').append(query); + } + } + + // Fragment + final String fragment = getStringValue(parts, "fragment"); + if (fragment != null) { + uri.append('#').append(encodeFragment(fragment)); + } + + return new StringValue(this, uri.toString()); + } + + private String getStringValue(final MapType map, final String key) throws XPathException { + final Sequence val = map.get(new StringValue(this, key)); + if (val != null && !val.isEmpty()) { + return val.getStringValue(); + } + return null; + } + + private boolean getBooleanOption(final MapType options, final String key, + final boolean defaultValue) throws XPathException { + final Sequence val = options.get(new StringValue(this, key)); + if (val != null && !val.isEmpty()) { + return val.effectiveBooleanValue(); + } + return defaultValue; + } + + private static boolean isDefaultPort(final String scheme, final int port) { + switch (scheme) { + case "http": return port == 80; + case "https": return port == 443; + case "ftp": return port == 21; + case "ssh": return port == 22; + default: return false; + } + } + + // Encode path segment: control chars + space % / ? # + [ ] + private static String encodePathSegment(final String s) { + if (s == null || s.isEmpty()) { + return s; + } + final StringBuilder sb = new StringBuilder(s.length()); + for (int i = 0; i < s.length(); i++) { + final char c = s.charAt(i); + if (c < 0x20 || c == ' ' || c == '%' || c == '/' || c == '?' + || c == '#' || c == '+' || c == '[' || c == ']') { + appendPercentEncoded(sb, c); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + // Encode query component: control chars + space % = & # + [ ] + private static String encodeQueryComponent(final String s) { + if (s == null || s.isEmpty()) { + return s; + } + final StringBuilder sb = new StringBuilder(s.length()); + for (int i = 0; i < s.length(); i++) { + final char c = s.charAt(i); + if (c < 0x20 || c == ' ' || c == '%' || c == '=' || c == '&' + || c == '#' || c == '+' || c == '[' || c == ']') { + appendPercentEncoded(sb, c); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + // Encode fragment: control chars + space % # + [ ] + private static String encodeFragment(final String s) { + if (s == null || s.isEmpty()) { + return s; + } + final StringBuilder sb = new StringBuilder(s.length()); + for (int i = 0; i < s.length(); i++) { + final char c = s.charAt(i); + if (c < 0x20 || c == ' ' || c == '%' || c == '#' || c == '+' || c == '[' || c == ']') { + appendPercentEncoded(sb, c); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + private static void appendPercentEncoded(final StringBuilder sb, final char c) { + if (c < 0x80) { + sb.append('%').append(String.format("%02X", (int) c)); + } else { + try { + final byte[] bytes = String.valueOf(c).getBytes("UTF-8"); + for (final byte b : bytes) { + sb.append('%').append(String.format("%02X", b & 0xFF)); + } + } catch (final UnsupportedEncodingException e) { + sb.append(c); + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnChar.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnChar.java new file mode 100644 index 00000000000..fb97fd1a0dc --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnChar.java @@ -0,0 +1,218 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +/** + * Implements fn:char (XQuery 4.0). + * + * Returns a string containing a single character identified by its codepoint + * or by an HTML5 character reference name. + */ +public class FnChar extends BasicFunction { + + private static final ErrorCodes.ErrorCode FOCH0005 = new ErrorCodes.ErrorCode( + "FOCH0005", "Unknown character name"); + + public static final FunctionSignature FN_CHAR = new FunctionSignature( + new QName("char", Function.BUILTIN_FUNCTION_NS), + "Returns a string containing a single character identified by codepoint or character name.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, + "A codepoint (integer) or character name (string)") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the character")); + + private static volatile Map htmlEntities; + + public FnChar(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final var item = args[0].itemAt(0); + final int type = item.getType(); + + if (Type.subTypeOf(type, Type.INTEGER)) { + // Codepoint + final long codepoint = ((IntegerValue) item).getLong(); + if (codepoint < 1 || codepoint > 0x10FFFF) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Codepoint " + codepoint + " is not in the valid range 1 to 1114111"); + } + // Check for XML-illegal characters (surrogates, etc.) + if (!isXmlChar((int) codepoint)) { + throw new XPathException(this, FOCH0005, + "Codepoint " + codepoint + " is not a valid XML character"); + } + return new StringValue(this, new String(Character.toChars((int) codepoint))); + } else if (Type.subTypeOf(type, Type.DOUBLE) || Type.subTypeOf(type, Type.FLOAT) + || Type.subTypeOf(type, Type.DECIMAL)) { + // Numeric but not integer — try to convert + final NumericValue num = (NumericValue) item; + if (num.hasFractionalPart()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Codepoint must be an integer, got " + Type.getTypeName(type)); + } + final long codepoint = num.getLong(); + if (codepoint < 1 || codepoint > 0x10FFFF) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Codepoint " + codepoint + " is not in the valid range 1 to 1114111"); + } + if (!isXmlChar((int) codepoint)) { + throw new XPathException(this, FOCH0005, + "Codepoint " + codepoint + " is not a valid XML character"); + } + return new StringValue(this, new String(Character.toChars((int) codepoint))); + } else { + // Character name lookup + final String name = item.getStringValue(); + + // Handle backslash escapes + switch (name) { + case "\\n": return new StringValue(this, "\n"); + case "\\r": return new StringValue(this, "\r"); + case "\\t": return new StringValue(this, "\t"); + } + + // Try HTML5 named character reference first (case-sensitive per spec) + final Map entities = getHtmlEntities(); + String resolved = entities.get(name); + if (resolved != null) { + return new StringValue(this, resolved); + } + + // Try Unicode character name + try { + final int cp = Character.codePointOf(name.replace(" ", "_").replace("-", "_").toUpperCase()); + if (isXmlChar(cp)) { + return new StringValue(this, new String(Character.toChars(cp))); + } + } catch (final IllegalArgumentException e) { + // Not a Unicode name either + } + + throw new XPathException(this, FOCH0005, + "Unknown character name: " + name); + } + } + + private static boolean isXmlChar(final int cp) { + return cp == 0x9 || cp == 0xA || cp == 0xD + || (cp >= 0x20 && cp <= 0xD7FF) + || (cp >= 0xE000 && cp <= 0xFFFD) + || (cp >= 0x10000 && cp <= 0x10FFFF); + } + + private static Map getHtmlEntities() { + if (htmlEntities == null) { + synchronized (FnChar.class) { + if (htmlEntities == null) { + htmlEntities = loadHtmlEntities(); + } + } + } + return htmlEntities; + } + + private static Map loadHtmlEntities() { + final Map map = new HashMap<>(2500); + + // Load from bundled resource file + final InputStream is = FnChar.class.getResourceAsStream("html5-entities.properties"); + if (is != null) { + try (final BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + line = line.trim(); + if (line.isEmpty() || line.startsWith("#")) { + continue; + } + final int eq = line.indexOf('='); + if (eq > 0) { + final String entityName = line.substring(0, eq); + final String codepoints = line.substring(eq + 1); + map.put(entityName, decodeCodepoints(codepoints)); + } + } + } catch (final IOException e) { + // Fall through with partial map + } + } + + // Add a few critical aliases if the file wasn't found + if (map.isEmpty()) { + addCommonEntities(map); + } + + return map; + } + + private static String decodeCodepoints(final String spec) { + // Format: "U+XXXX" or "U+XXXX,U+YYYY" + final StringBuilder sb = new StringBuilder(); + for (final String part : spec.split(",")) { + final String trimmed = part.trim(); + if (trimmed.startsWith("U+") || trimmed.startsWith("u+")) { + final int cp = Integer.parseInt(trimmed.substring(2), 16); + sb.appendCodePoint(cp); + } + } + return sb.toString(); + } + + private static void addCommonEntities(final Map map) { + map.put("amp", "&"); + map.put("lt", "<"); + map.put("gt", ">"); + map.put("quot", "\""); + map.put("apos", "'"); + map.put("nbsp", "\u00A0"); + map.put("tab", "\t"); + map.put("newline", "\n"); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCharacters.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCharacters.java new file mode 100644 index 00000000000..a45f63ca623 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCharacters.java @@ -0,0 +1,77 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:characters (XQuery 4.0). + * + * Splits the supplied string into a sequence of single-character strings. + */ +public class FnCharacters extends BasicFunction { + + public static final FunctionSignature FN_CHARACTERS = new FunctionSignature( + new QName("characters", Function.BUILTIN_FUNCTION_NS), + "Splits the supplied string into a sequence of single-character strings.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to split") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_MORE, "a sequence of single-character strings")); + + public FnCharacters(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final String str = args[0].getStringValue(); + if (str.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final ValueSequence result = new ValueSequence(str.length()); + // Use codepoint iteration to handle surrogate pairs correctly + int i = 0; + while (i < str.length()) { + final int codepoint = str.codePointAt(i); + result.add(new StringValue(this, new String(Character.toChars(codepoint)))); + i += Character.charCount(codepoint); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCivilTimezone.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCivilTimezone.java new file mode 100644 index 00000000000..bace87f5755 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCivilTimezone.java @@ -0,0 +1,152 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AbstractDateTimeValue; +import org.exist.xquery.value.DayTimeDurationValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import javax.xml.datatype.XMLGregorianCalendar; +import javax.xml.datatype.DatatypeConstants; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.zone.ZoneRulesException; + +/** + * Implements XQuery 4.0 fn:civil-timezone. + * + * fn:civil-timezone($value as xs:dateTime, $place as xs:string?) as xs:dayTimeDuration + * + * Returns the civil timezone offset for a given dateTime at a given IANA timezone location, + * accounting for daylight savings time transitions. + */ +public class FnCivilTimezone extends BasicFunction { + + private static final ErrorCodes.ErrorCode FODT0004 = new ErrorCodes.ErrorCode("FODT0004", + "No timezone data available"); + + public static final FunctionSignature[] FN_CIVIL_TIMEZONE = { + new FunctionSignature( + new QName("civil-timezone", Function.BUILTIN_FUNCTION_NS), + "Returns the civil timezone offset for a dateTime at a place.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.DATE_TIME, Cardinality.EXACTLY_ONE, "The dateTime to look up"), + new FunctionParameterSequenceType("place", Type.STRING, Cardinality.ZERO_OR_ONE, "IANA timezone name (e.g. 'America/New_York')") + }, + new FunctionReturnSequenceType(Type.DAY_TIME_DURATION, Cardinality.EXACTLY_ONE, "the civil timezone offset")), + new FunctionSignature( + new QName("civil-timezone", Function.BUILTIN_FUNCTION_NS), + "Returns the civil timezone offset for a dateTime using the default place.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.DATE_TIME, Cardinality.EXACTLY_ONE, "The dateTime to look up") + }, + new FunctionReturnSequenceType(Type.DAY_TIME_DURATION, Cardinality.EXACTLY_ONE, "the civil timezone offset")) + }; + + public FnCivilTimezone(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final AbstractDateTimeValue dtv = (AbstractDateTimeValue) args[0].itemAt(0); + final XMLGregorianCalendar cal = (XMLGregorianCalendar) dtv.calendar.clone(); + + // Determine the IANA zone + final ZoneId zone; + if (args.length > 1 && !args[1].isEmpty()) { + final String place = args[1].getStringValue(); + try { + zone = ZoneId.of(place); + } catch (final java.time.DateTimeException e) { + throw new XPathException(this, FODT0004, + "Unknown timezone: " + place); + } + } else { + // Use system default timezone as the "default place" + zone = ZoneId.systemDefault(); + } + + // Convert the dateTime to a LocalDateTime (ignoring any timezone on the value) + final int year = cal.getYear(); + final int month = cal.getMonth(); + final int day = cal.getDay(); + final int hour = cal.getHour() == DatatypeConstants.FIELD_UNDEFINED ? 0 : cal.getHour(); + final int minute = cal.getMinute() == DatatypeConstants.FIELD_UNDEFINED ? 0 : cal.getMinute(); + final int second = cal.getSecond() == DatatypeConstants.FIELD_UNDEFINED ? 0 : cal.getSecond(); + + final LocalDateTime ldt = LocalDateTime.of(year, month, day, hour, minute, second); + + // Get the offset at that local date-time in the given zone + final ZonedDateTime zdt = ldt.atZone(zone); + final ZoneOffset offset = zdt.getOffset(); + final int totalSeconds = offset.getTotalSeconds(); + + // Convert to xs:dayTimeDuration + final String dur = secondsToDayTimeDuration(totalSeconds); + return new DayTimeDurationValue(this, dur); + } + + private static String secondsToDayTimeDuration(final int totalSeconds) { + final boolean negative = totalSeconds < 0; + int abs = Math.abs(totalSeconds); + final int hours = abs / 3600; + abs %= 3600; + final int minutes = abs / 60; + final int seconds = abs % 60; + + final StringBuilder sb = new StringBuilder(); + if (negative) { + sb.append('-'); + } + sb.append("PT"); + if (hours > 0) { + sb.append(hours).append('H'); + } + if (minutes > 0) { + sb.append(minutes).append('M'); + } + if (seconds > 0) { + sb.append(seconds).append('S'); + } + // If all zero, output PT0S + if (hours == 0 && minutes == 0 && seconds == 0) { + sb.append("0S"); + } + return sb.toString(); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCollation.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCollation.java new file mode 100644 index 00000000000..15df9a08251 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCollation.java @@ -0,0 +1,72 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +/** + * fn:collation() — Returns the default collation URI. + * fn:collation-available($uri) — Returns true if the collation is supported. + */ +public class FnCollation extends BasicFunction { + + public static final FunctionSignature FN_COLLATION = new FunctionSignature( + new QName("collation", Function.BUILTIN_FUNCTION_NS), + "Returns the URI of the default collation.", + null, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, + "The default collation URI")); + + public static final FunctionSignature FN_COLLATION_AVAILABLE = new FunctionSignature( + new QName("collation-available", Function.BUILTIN_FUNCTION_NS), + "Returns true if the specified collation is supported.", + new SequenceType[] { + new FunctionParameterSequenceType("uri", Type.STRING, + Cardinality.EXACTLY_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, + "true if the collation is supported")); + + public FnCollation(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("collation")) { + final String defaultCollation = context.getDefaultCollation(); + return new StringValue(this, defaultCollation != null ? defaultCollation + : org.exist.util.Collations.UNICODE_CODEPOINT_COLLATION_URI); + } else { + // collation-available + final String uri = args[0].getStringValue(); + try { + context.getCollator(uri); + return BooleanValue.TRUE; + } catch (final XPathException e) { + return BooleanValue.FALSE; + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDateTimeParts.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDateTimeParts.java new file mode 100644 index 00000000000..b9445ce1fd0 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDateTimeParts.java @@ -0,0 +1,176 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; + +import javax.xml.datatype.DatatypeConstants; +import java.math.BigDecimal; + +/** + * fn:build-dateTime($date, $time) — Combine xs:date + xs:time into xs:dateTime. + * fn:parts-of-dateTime($dateTime) — Decompose xs:dateTime into a map of components. + * + * The map returned by parts-of-dateTime has keys: year, month, day, hour, minute, + * seconds (as xs:decimal including fractional), timezone (as xs:dayTimeDuration). + * When the Parser branch merges, these maps will be compatible with record type checking. + */ +public class FnDateTimeParts extends BasicFunction { + + public static final FunctionSignature FN_BUILD_DATETIME = new FunctionSignature( + new QName("build-dateTime", Function.BUILTIN_FUNCTION_NS), + "Combines an xs:date and an xs:time into an xs:dateTime.", + new SequenceType[] { + new FunctionParameterSequenceType("date", Type.DATE, + Cardinality.EXACTLY_ONE, "The date component"), + new FunctionParameterSequenceType("time", Type.TIME, + Cardinality.EXACTLY_ONE, "The time component") + }, + new FunctionReturnSequenceType(Type.DATE_TIME, Cardinality.EXACTLY_ONE, + "The combined xs:dateTime")); + + public static final FunctionSignature FN_PARTS_OF_DATETIME = new FunctionSignature( + new QName("parts-of-dateTime", Function.BUILTIN_FUNCTION_NS), + "Decomposes an xs:dateTime into a map of its components.", + new SequenceType[] { + new FunctionParameterSequenceType("dateTime", Type.DATE_TIME, + Cardinality.ZERO_OR_ONE, "The dateTime to decompose") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, + "A map with keys: year, month, day, hour, minute, seconds, timezone")); + + public FnDateTimeParts(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("build-dateTime")) { + return buildDateTime(args); + } else { + return partsOfDateTime(args); + } + } + + private Sequence buildDateTime(final Sequence[] args) throws XPathException { + final DateValue date = (DateValue) args[0].itemAt(0); + final TimeValue time = (TimeValue) args[1].itemAt(0); + + final int year = date.getPart(AbstractDateTimeValue.YEAR); + final int month = date.getPart(AbstractDateTimeValue.MONTH); + final int day = date.getPart(AbstractDateTimeValue.DAY); + final int hour = time.getPart(AbstractDateTimeValue.HOUR); + final int minute = time.getPart(AbstractDateTimeValue.MINUTE); + final int second = time.getPart(AbstractDateTimeValue.SECOND); + final int millis = time.getPart(AbstractDateTimeValue.MILLISECOND); + + // Timezone: both must agree or one must be absent + final Sequence dateTz = date.getTimezone(); + final Sequence timeTz = time.getTimezone(); + + String tzSuffix = ""; + if (!dateTz.isEmpty() && !timeTz.isEmpty()) { + // Both have timezones — they must be equal + final String dateTzStr = dateTz.getStringValue(); + final String timeTzStr = timeTz.getStringValue(); + if (!dateTzStr.equals(timeTzStr)) { + throw new XPathException(this, ErrorCodes.FORG0008, + "Date and time timezone offsets do not match"); + } + tzSuffix = formatTimezoneOffset(date); + } else if (!dateTz.isEmpty()) { + tzSuffix = formatTimezoneOffset(date); + } else if (!timeTz.isEmpty()) { + tzSuffix = formatTimezoneOffset(time); + } + + // Build the lexical representation + final String fracSeconds = millis > 0 ? "." + String.format("%03d", millis) : ""; + final String lexical = String.format("%04d-%02d-%02dT%02d:%02d:%02d%s%s", + year, month, day, hour, minute, second, fracSeconds, tzSuffix); + + return new DateTimeValue(this, lexical); + } + + private String formatTimezoneOffset(final AbstractDateTimeValue dt) throws XPathException { + final Sequence tz = dt.getTimezone(); + if (tz.isEmpty()) { + return ""; + } + final DayTimeDurationValue dtv = (DayTimeDurationValue) tz; + final int totalMinutes = (int) (dtv.getValueInMilliseconds() / 60000L); + if (totalMinutes == 0) { + return "Z"; + } + final int hours = totalMinutes / 60; + final int mins = Math.abs(totalMinutes % 60); + return String.format("%+03d:%02d", hours, mins); + } + + private Sequence partsOfDateTime(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final DateTimeValue dt = (DateTimeValue) args[0].itemAt(0); + final MapType result = new MapType(this, context); + + // year as xs:integer + result.add(new StringValue("year"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.YEAR))); + + // month as xs:integer + result.add(new StringValue("month"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.MONTH))); + + // day as xs:integer + result.add(new StringValue("day"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.DAY))); + + // hour as xs:integer + result.add(new StringValue("hour"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.HOUR))); + + // minute as xs:integer + result.add(new StringValue("minute"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.MINUTE))); + + // seconds as xs:decimal (including fractional part) + final int sec = dt.getPart(AbstractDateTimeValue.SECOND); + final int millis = dt.getPart(AbstractDateTimeValue.MILLISECOND); + final BigDecimal seconds = BigDecimal.valueOf(sec) + .add(BigDecimal.valueOf(millis, 3)); + result.add(new StringValue("seconds"), + new DecimalValue(this, seconds)); + + // timezone as xs:dayTimeDuration (or absent) + final Sequence tz = dt.getTimezone(); + if (!tz.isEmpty()) { + result.add(new StringValue("timezone"), tz); + } + + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDecodeFromUri.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDecodeFromUri.java new file mode 100644 index 00000000000..b94f7bc58c1 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDecodeFromUri.java @@ -0,0 +1,183 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; + +/** + * Implements XQuery 4.0 fn:decode-from-uri. + * + * Decodes a URI-encoded string. Replaces '+' with space. + * Invalid/incomplete percent sequences are replaced with U+FFFD. + * Resulting octets are decoded as UTF-8; invalid UTF-8 is replaced with U+FFFD. + * XML-invalid codepoints are replaced with U+FFFD. + */ +public class FnDecodeFromUri extends BasicFunction { + + private static final char REPLACEMENT = '\uFFFD'; + + public static final FunctionSignature FN_DECODE_FROM_URI = new FunctionSignature( + new QName("decode-from-uri", Function.BUILTIN_FUNCTION_NS), + "Decodes a URI-encoded string.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The URI-encoded string to decode") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the decoded string")); + + public FnDecodeFromUri(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return new StringValue(this, ""); + } + + final String input = args[0].getStringValue(); + + // Phase 1: decode percent-encoding and '+' to bytes, collecting raw bytes + final ByteArrayOutputStream bytes = new ByteArrayOutputStream(input.length()); + final StringBuilder result = new StringBuilder(input.length()); + + int i = 0; + while (i < input.length()) { + final char c = input.charAt(i); + if (c == '+') { + // Flush any accumulated bytes first + flushBytes(bytes, result); + result.append(' '); + i++; + } else if (c == '%') { + // Try to read percent-encoded byte + if (i + 2 < input.length() && isAscii(input.charAt(i + 1))) { + // Two chars follow and first is ASCII — treat as percent triplet + final int hi = hexDigit(input.charAt(i + 1)); + final int lo = hexDigit(input.charAt(i + 2)); + if (hi >= 0 && lo >= 0) { + bytes.write((hi << 4) | lo); + i += 3; + } else { + // Invalid hex pair: consume all 3 chars, produce one replacement + flushBytes(bytes, result); + result.append(REPLACEMENT); + i += 3; + } + } else if (i + 1 < input.length()) { + // First char after % is non-ASCII, or only 1 char follows + // Consume % + next char, produce replacement + flushBytes(bytes, result); + result.append(REPLACEMENT); + i += 2; + } else { + // % at end of string + flushBytes(bytes, result); + result.append(REPLACEMENT); + i++; + } + } else { + flushBytes(bytes, result); + result.append(c); + i++; + } + } + flushBytes(bytes, result); + + // Phase 2: replace XML-invalid codepoints (handle surrogate pairs for supplementary chars) + final StringBuilder cleaned = new StringBuilder(result.length()); + for (int j = 0; j < result.length(); j++) { + final char ch = result.charAt(j); + if (Character.isHighSurrogate(ch) && j + 1 < result.length() + && Character.isLowSurrogate(result.charAt(j + 1))) { + // Valid surrogate pair = supplementary character (valid in XML 1.0 4th+ edition) + cleaned.append(ch); + cleaned.append(result.charAt(++j)); + } else if (isXmlValid(ch)) { + cleaned.append(ch); + } else { + cleaned.append(REPLACEMENT); + } + } + + return new StringValue(this, cleaned.toString()); + } + + /** + * Flush accumulated bytes as UTF-8, replacing invalid sequences with U+FFFD. + */ + private void flushBytes(final ByteArrayOutputStream bytes, final StringBuilder result) { + if (bytes.size() == 0) { + return; + } + final byte[] data = bytes.toByteArray(); + bytes.reset(); + + final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .replaceWith("\uFFFD"); + + final ByteBuffer bb = ByteBuffer.wrap(data); + final CharBuffer cb = CharBuffer.allocate(data.length * 2); + decoder.decode(bb, cb, true); + decoder.flush(cb); + cb.flip(); + result.append(cb); + } + + private static int hexDigit(final char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; + } + + private static boolean isAscii(final char c) { + return c <= 0x7F; + } + + private static boolean isXmlValid(final char c) { + return c == 0x9 || c == 0xA || c == 0xD || + (c >= 0x20 && c <= 0xD7FF) || + (c >= 0xE000 && c <= 0xFFFD); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDeepEqualOptions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDeepEqualOptions.java new file mode 100644 index 00000000000..05973da16c1 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDeepEqualOptions.java @@ -0,0 +1,84 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +/** + * Implements XQuery 4.0 fn:deep-equal with options parameter (string or map). + * + * Accepts either a collation URI string (XQ3.1 compatible) or an options + * map (XQ4.0) as the 3rd parameter. When an options map is provided, + * validates all option keys/values and uses the options-aware comparison + * engine in {@link DeepEqualOptions}. + */ +public class FnDeepEqualOptions extends BasicFunction { + + public static final FunctionSignature FN_DEEP_EQUAL_OPTIONS = new FunctionSignature( + new QName("deep-equal", Function.BUILTIN_FUNCTION_NS), + "Returns true() iff every item in $items-1 is deep-equal to the item " + + "at the same position in $items-2, using the specified options or collation. " + + "If both $items-1 and $items-2 are the empty sequence, returns true().", + new SequenceType[]{ + new FunctionParameterSequenceType("items-1", Type.ITEM, + Cardinality.ZERO_OR_MORE, "The first item sequence"), + new FunctionParameterSequenceType("items-2", Type.ITEM, + Cardinality.ZERO_OR_MORE, "The second item sequence"), + new FunctionParameterSequenceType("options", Type.ITEM, + Cardinality.ZERO_OR_ONE, "Collation URI string or options map") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, + "true() if the sequences are deep-equal, false() otherwise")); + + public FnDeepEqualOptions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence items1 = args[0]; + final Sequence items2 = args[1]; + + // Parse 3rd parameter: either string (collation) or map (options) + if (args.length > 2 && !args[2].isEmpty()) { + final Item optionsItem = args[2].itemAt(0); + if (optionsItem instanceof AbstractMapType) { + // XQ4: options map — parse, validate, and use options-aware comparison + final DeepEqualOptions options = DeepEqualOptions.parse( + (AbstractMapType) optionsItem, context); + return BooleanValue.valueOf(options.deepEqualsSeq(items1, items2)); + } else { + // XQ3.1 compat: string collation URI + final Collator collator = context.getCollator(optionsItem.getStringValue()); + return BooleanValue.valueOf(FunDeepEqual.deepEqualsSeq(items1, items2, collator)); + } + } + + // No 3rd parameter — use default comparison + final Collator collator = context.getDefaultCollator(); + return BooleanValue.valueOf(FunDeepEqual.deepEqualsSeq(items1, items2, collator)); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDistinctOrderedNodes.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDistinctOrderedNodes.java new file mode 100644 index 00000000000..e8f6f151094 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDistinctOrderedNodes.java @@ -0,0 +1,71 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements XQuery 4.0 fn:distinct-ordered-nodes. + * + * Returns nodes in document order with duplicates removed, equivalent to + * the "/" operator's node deduplication behavior. + */ +public class FnDistinctOrderedNodes extends BasicFunction { + + public static final FunctionSignature FN_DISTINCT_ORDERED_NODES = new FunctionSignature( + new QName("distinct-ordered-nodes", Function.BUILTIN_FUNCTION_NS), + "Returns nodes in document order with duplicates removed.", + new SequenceType[] { + new FunctionParameterSequenceType("nodes", Type.NODE, Cardinality.ZERO_OR_MORE, "The nodes to deduplicate and order") + }, + new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, "the deduplicated nodes in document order")); + + public FnDistinctOrderedNodes(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence nodes = args[0]; + if (nodes.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // ValueSequence with noDups=true handles both document ordering and deduplication + final ValueSequence result = new ValueSequence(true); + result.addAll(nodes); + result.removeDuplicates(); + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDivideDecimals.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDivideDecimals.java new file mode 100644 index 00000000000..0ebd7c732f0 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDivideDecimals.java @@ -0,0 +1,119 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.DecimalValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import org.exist.xquery.functions.map.MapType; + +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.Item; + +import java.math.BigDecimal; +import java.math.RoundingMode; + +/** + * Implements XQuery 4.0 fn:divide-decimals. + * + * fn:divide-decimals($value, $divisor, $precision?) returns a record with + * quotient and remainder fields. + */ +public class FnDivideDecimals extends BasicFunction { + + public static final FunctionSignature[] FN_DIVIDE_DECIMALS = { + new FunctionSignature( + new QName("divide-decimals", Function.BUILTIN_FUNCTION_NS), + "Divides one decimal by another to specified precision, returning quotient and remainder.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.DECIMAL, Cardinality.EXACTLY_ONE, "The dividend"), + new FunctionParameterSequenceType("divisor", Type.DECIMAL, Cardinality.EXACTLY_ONE, "The divisor"), + new FunctionParameterSequenceType("precision", Type.INTEGER, Cardinality.ZERO_OR_ONE, "Decimal precision (default: 0)") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "record with quotient and remainder")), + new FunctionSignature( + new QName("divide-decimals", Function.BUILTIN_FUNCTION_NS), + "Divides one decimal by another returning integer quotient and remainder.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.DECIMAL, Cardinality.EXACTLY_ONE, "The dividend"), + new FunctionParameterSequenceType("divisor", Type.DECIMAL, Cardinality.EXACTLY_ONE, "The divisor") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "record with quotient and remainder")) + }; + + public FnDivideDecimals(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final BigDecimal value = toBigDecimal(args[0].itemAt(0)); + final BigDecimal divisor = toBigDecimal(args[1].itemAt(0)); + + if (divisor.compareTo(BigDecimal.ZERO) == 0) { + throw new XPathException(this, ErrorCodes.FOAR0001, "Division by zero"); + } + + int precision = 0; + if (args.length > 2 && !args[2].isEmpty()) { + precision = (int) ((IntegerValue) args[2].itemAt(0)).getLong(); + } + + // Quotient: truncate toward zero to given precision + final BigDecimal quotient = value.divide(divisor, precision, RoundingMode.DOWN); + final BigDecimal remainder = value.subtract(quotient.multiply(divisor)); + + // Build result record (map) + final MapType result = new MapType(this, context); + result.add(new StringValue(this, "quotient"), new DecimalValue(this, quotient)); + result.add(new StringValue(this, "remainder"), new DecimalValue(this, remainder)); + + return result; + } + + private BigDecimal toBigDecimal(final Item item) throws XPathException { + final AtomicValue av = item.atomize(); + if (av instanceof DecimalValue) { + return ((DecimalValue) av).getValue(); + } + // xs:integer is a subtype of xs:decimal — use string to avoid long truncation + if (av instanceof IntegerValue) { + return new BigDecimal(av.getStringValue()); + } + // Fallback: convert to decimal + return ((DecimalValue) av.convertTo(Type.DECIMAL)).getValue(); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDuplicateValues.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDuplicateValues.java new file mode 100644 index 00000000000..356b53b6826 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDuplicateValues.java @@ -0,0 +1,126 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:duplicate-values (XQuery 4.0). + * + * Returns the values that appear more than once in the input sequence. + */ +public class FnDuplicateValues extends BasicFunction { + + public static final FunctionSignature[] FN_DUPLICATE_VALUES = { + new FunctionSignature( + new QName("duplicate-values", Function.BUILTIN_FUNCTION_NS), + "Returns those values that appear more than once in the input sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The input values") + }, + new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "the duplicate values")), + new FunctionSignature( + new QName("duplicate-values", Function.BUILTIN_FUNCTION_NS), + "Returns those values that appear more than once in the input sequence, using the specified collation.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The input values"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "the duplicate values")) + }; + + public FnDuplicateValues(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence values = args[0]; + if (values.getItemCount() <= 1) { + return Sequence.EMPTY_SEQUENCE; + } + + final Collator collator = getCollator(args); + + // Use contextual equality (fn:compare = 0) per XQ4 spec + final java.util.List seen = new java.util.ArrayList<>(); + final java.util.List reported = new java.util.ArrayList<>(); + final ValueSequence result = new ValueSequence(); + + for (final SequenceIterator i = values.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final AtomicValue value = item.atomize(); + + boolean isDuplicate = false; + for (final AtomicValue prev : seen) { + if (FnAllEqualDifferent.contextuallyEqual(prev, value, collator)) { + isDuplicate = true; + break; + } + } + + if (isDuplicate) { + // Check if we already reported this value + boolean alreadyReported = false; + for (final AtomicValue rep : reported) { + if (FnAllEqualDifferent.contextuallyEqual(rep, value, collator)) { + alreadyReported = true; + break; + } + } + if (!alreadyReported) { + result.add(value); + reported.add(value); + } + } else { + seen.add(value); + } + } + return result; + } + + private Collator getCollator(final Sequence[] args) throws XPathException { + if (args.length > 1 && !args[1].isEmpty()) { + final String collationURI = args[1].getStringValue(); + return context.getCollator(collationURI, ErrorCodes.FOCH0002); + } + return context.getDefaultCollator(); + } + +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMap.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMap.java new file mode 100644 index 00000000000..5635586ee7a --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMap.java @@ -0,0 +1,458 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; +import org.w3c.dom.*; + +import javax.xml.XMLConstants; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Implements XQuery 4.0 fn:element-to-map. + * + * Converts an element node to a map representation following the XQ4 spec rules + * for different content models (empty, simple, record, list, sequence, mixed). + */ +public class FnElementToMap extends BasicFunction { + + public static final FunctionSignature[] FN_ELEMENT_TO_MAP = { + new FunctionSignature( + new QName("element-to-map", Function.BUILTIN_FUNCTION_NS), + "Converts an element to a map representation.", + new SequenceType[]{ + new FunctionParameterSequenceType("element", Type.ELEMENT, Cardinality.ZERO_OR_ONE, "The element to convert") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "The map representation")), + new FunctionSignature( + new QName("element-to-map", Function.BUILTIN_FUNCTION_NS), + "Converts an element to a map representation with options.", + new SequenceType[]{ + new FunctionParameterSequenceType("element", Type.ELEMENT, Cardinality.ZERO_OR_ONE, "The element to convert"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "Options map") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "The map representation")) + }; + + private static final String DEFAULT_ATTR_MARKER = "@"; + private static final String DEFAULT_CONTENT_KEY = "#content"; + private static final String DEFAULT_COMMENT_KEY = "#comment"; + private static final String DEFAULT_NAME_FORMAT = "eqname"; + + public FnElementToMap(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final Node node = ((NodeValue) args[0].itemAt(0)).getNode(); + if (node.getNodeType() != Node.ELEMENT_NODE) { + throw new XPathException(this, ErrorCodes.XPTY0004, "Expected element node"); + } + + // Parse options + String nameFormat = DEFAULT_NAME_FORMAT; + String attrMarker = DEFAULT_ATTR_MARKER; + String contentKey = DEFAULT_CONTENT_KEY; + String commentKey = DEFAULT_COMMENT_KEY; + + if (args.length > 1 && !args[1].isEmpty()) { + final MapType options = (MapType) args[1].itemAt(0); + final Sequence nfSeq = options.get(new StringValue(this, "name-format")); + if (nfSeq != null && !nfSeq.isEmpty()) { + nameFormat = nfSeq.getStringValue(); + } + final Sequence amSeq = options.get(new StringValue(this, "attribute-marker")); + if (amSeq != null && !amSeq.isEmpty()) { + attrMarker = amSeq.getStringValue(); + } + final Sequence ckSeq = options.get(new StringValue(this, "content-key")); + if (ckSeq != null && !ckSeq.isEmpty()) { + contentKey = ckSeq.getStringValue(); + } + final Sequence cmSeq = options.get(new StringValue(this, "comment-key")); + if (cmSeq != null && !cmSeq.isEmpty()) { + commentKey = cmSeq.getStringValue(); + } + } + + final Options opts = new Options(nameFormat, attrMarker, contentKey, commentKey); + return convertElement((Element) node, opts); + } + + private MapType convertElement(final Element elem, final Options opts) throws XPathException { + final String elemName = formatName(elem, opts); + final Sequence value = convertContent(elem, opts); + + MapType result = new MapType(this, context); + result = (MapType) result.put(new StringValue(this, elemName), value); + return result; + } + + private Sequence convertContent(final Element elem, final Options opts) throws XPathException { + // Collect attributes (excluding xmlns and xsi:type) + final Map attrs = new LinkedHashMap<>(); + final NamedNodeMap attrNodes = elem.getAttributes(); + if (attrNodes != null) { + for (int i = 0; i < attrNodes.getLength(); i++) { + final Attr attr = (Attr) attrNodes.item(i); + final String attrName = attr.getName(); + // Skip namespace declarations and xsi:type + if (attrName.startsWith("xmlns") && (attrName.length() == 5 || attrName.charAt(5) == ':')) { + continue; + } + if ("xsi:type".equals(attrName)) { + continue; + } + if (attrName.equals("xsi:nil")) { + continue; + } + final String key = opts.attrMarker + formatAttrName(attr, opts); + attrs.put(key, attr.getValue()); + } + } + + // Check for xsi:nil + final String nilAttr = elem.getAttributeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "nil"); + if ("true".equals(nilAttr) || "1".equals(nilAttr)) { + if (attrs.isEmpty()) { + // Return fn:null() as QName + return new QNameValue(this, context, new QName("null", Function.BUILTIN_FUNCTION_NS, "fn")); + } else { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put( + new StringValue(this, opts.contentKey), + new QNameValue(this, context, new QName("null", Function.BUILTIN_FUNCTION_NS, "fn"))); + return attrMap; + } + } + + // Collect child nodes (elements, text, comments, PIs) + final List children = new ArrayList<>(); + final NodeList childNodes = elem.getChildNodes(); + for (int i = 0; i < childNodes.getLength(); i++) { + final Node child = childNodes.item(i); + switch (child.getNodeType()) { + case Node.ELEMENT_NODE: + case Node.TEXT_NODE: + case Node.CDATA_SECTION_NODE: + case Node.COMMENT_NODE: + children.add(child); + break; + default: + break; + } + } + + // Classify content model + final boolean hasElements = children.stream().anyMatch(n -> n.getNodeType() == Node.ELEMENT_NODE); + final boolean hasTextContent = children.stream().anyMatch(n -> + (n.getNodeType() == Node.TEXT_NODE || n.getNodeType() == Node.CDATA_SECTION_NODE) + && !n.getTextContent().trim().isEmpty()); + final boolean hasComments = children.stream().anyMatch(n -> n.getNodeType() == Node.COMMENT_NODE); + + // Empty element + if (children.isEmpty() || (!hasElements && !hasTextContent && !hasComments)) { + if (attrs.isEmpty()) { + return new StringValue(this, ""); + } else { + // Empty-plus: attributes only, no #content key + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + return attrMap; + } + } + + // Simple text content (no child elements) + if (!hasElements && !hasComments) { + final String textContent = getTextContent(children); + if (attrs.isEmpty()) { + return new StringValue(this, textContent); + } else { + return buildAttrMap(attrs, new StringValue(this, textContent), opts); + } + } + + // Mixed content (has both text and element children) + if (hasTextContent && hasElements) { + return buildMixedContent(children, attrs, opts); + } + + // Element-only content — determine layout + final List childElements = new ArrayList<>(); + for (final Node child : children) { + if (child.getNodeType() == Node.ELEMENT_NODE) { + childElements.add((Element) child); + } + } + + // Check for comments interleaved with elements + if (hasComments && !hasElements) { + return buildMixedContent(children, attrs, opts); + } + + // Check if all children have the same name (list pattern) + final boolean allSameName = childElements.size() > 1 && + childElements.stream().allMatch(e -> + formatName(e, opts).equals(formatName(childElements.get(0), opts))); + + // Check if all children have unique names (record pattern) + final Map> groupedByName = new LinkedHashMap<>(); + for (final Element child : childElements) { + groupedByName.computeIfAbsent(formatName(child, opts), k -> new ArrayList<>()).add(child); + } + final boolean allUnique = groupedByName.values().stream().allMatch(l -> l.size() == 1); + + if (allSameName) { + // List layout: array of child values + return buildListContent(childElements, attrs, opts); + } else if (allUnique) { + // Record layout: map of child name → value + return buildRecordContent(childElements, attrs, children, opts); + } else { + // Sequence layout: array of child maps + return buildSequenceContent(children, attrs, opts); + } + } + + private Sequence buildAttrMap(final Map attrs, final Sequence contentValue, final Options opts) throws XPathException { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put(new StringValue(this, opts.contentKey), contentValue); + return attrMap; + } + + private Sequence buildListContent(final List children, final Map attrs, final Options opts) throws XPathException { + // Array of child content values + final List items = new ArrayList<>(); + for (final Element child : children) { + items.add(convertContent(child, opts)); + } + final ArrayType array = new ArrayType(this, context, items); + + if (attrs.isEmpty()) { + return array; + } else { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put(new StringValue(this, opts.contentKey), array); + return attrMap; + } + } + + private Sequence buildRecordContent(final List childElements, final Map attrs, + final List allChildren, final Options opts) throws XPathException { + MapType recordMap = new MapType(this, context); + + // Add attributes first + for (final Map.Entry a : attrs.entrySet()) { + recordMap = (MapType) recordMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + + // Add comments if present + for (final Node child : allChildren) { + if (child.getNodeType() == Node.COMMENT_NODE) { + recordMap = (MapType) recordMap.put( + new StringValue(this, opts.commentKey), + new StringValue(this, child.getTextContent())); + } + } + + // Add child elements + for (final Element child : childElements) { + final String childName = formatName(child, opts); + final Sequence childValue = convertContent(child, opts); + recordMap = (MapType) recordMap.put(new StringValue(this, childName), childValue); + } + + return recordMap; + } + + private Sequence buildSequenceContent(final List children, final Map attrs, final Options opts) throws XPathException { + // Build array of child maps/values + final List items = new ArrayList<>(); + for (final Node child : children) { + if (child.getNodeType() == Node.ELEMENT_NODE) { + items.add(convertElement((Element) child, opts)); + } else if (child.getNodeType() == Node.TEXT_NODE || child.getNodeType() == Node.CDATA_SECTION_NODE) { + final String text = child.getTextContent(); + if (!text.trim().isEmpty()) { + items.add(new StringValue(this, text)); + } + } else if (child.getNodeType() == Node.COMMENT_NODE) { + MapType commentMap = new MapType(this, context); + commentMap = (MapType) commentMap.put( + new StringValue(this, opts.commentKey), + new StringValue(this, child.getTextContent())); + items.add(commentMap); + } + } + final ArrayType array = new ArrayType(this, context, items); + + if (attrs.isEmpty()) { + return array; + } else { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put(new StringValue(this, opts.contentKey), array); + return attrMap; + } + } + + private Sequence buildMixedContent(final List children, final Map attrs, final Options opts) throws XPathException { + final List items = new ArrayList<>(); + for (final Node child : children) { + switch (child.getNodeType()) { + case Node.ELEMENT_NODE: + items.add(convertElement((Element) child, opts)); + break; + case Node.TEXT_NODE: + case Node.CDATA_SECTION_NODE: + final String text = child.getTextContent(); + if (!text.isEmpty()) { + items.add(new StringValue(this, text)); + } + break; + case Node.COMMENT_NODE: + MapType commentMap = new MapType(this, context); + commentMap = (MapType) commentMap.put( + new StringValue(this, opts.commentKey), + new StringValue(this, child.getTextContent())); + items.add(commentMap); + break; + default: + break; + } + } + final ArrayType array = new ArrayType(this, context, items); + + if (attrs.isEmpty()) { + return array; + } else { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put(new StringValue(this, opts.contentKey), array); + return attrMap; + } + } + + private String formatName(final Element elem, final Options opts) { + final String ns = elem.getNamespaceURI(); + final String local = elem.getLocalName() != null ? elem.getLocalName() : elem.getTagName(); + + switch (opts.nameFormat) { + case "eqname": + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + case "lexical": + final String prefix = elem.getPrefix(); + if (prefix != null && !prefix.isEmpty()) { + return prefix + ":" + local; + } + return local; + case "local": + return local; + default: + // Default to eqname + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + } + } + + private String formatAttrName(final Attr attr, final Options opts) { + final String ns = attr.getNamespaceURI(); + final String local = attr.getLocalName() != null ? attr.getLocalName() : attr.getName(); + + switch (opts.nameFormat) { + case "eqname": + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + case "lexical": + final String prefix = attr.getPrefix(); + if (prefix != null && !prefix.isEmpty()) { + return prefix + ":" + local; + } + return local; + case "local": + return local; + default: + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + } + } + + private static String getTextContent(final List children) { + final StringBuilder sb = new StringBuilder(); + for (final Node child : children) { + if (child.getNodeType() == Node.TEXT_NODE || child.getNodeType() == Node.CDATA_SECTION_NODE) { + sb.append(child.getTextContent()); + } + } + return sb.toString(); + } + + private static class Options { + final String nameFormat; + final String attrMarker; + final String contentKey; + final String commentKey; + + Options(final String nameFormat, final String attrMarker, final String contentKey, final String commentKey) { + this.nameFormat = nameFormat; + this.attrMarker = attrMarker; + this.contentKey = contentKey; + this.commentKey = commentKey; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnEverySome.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnEverySome.java new file mode 100644 index 00000000000..ee18e143012 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnEverySome.java @@ -0,0 +1,177 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements XQuery 4.0 fn:every and fn:some. + */ +public class FnEverySome extends BasicFunction { + + public static final FunctionSignature[] FN_EVERY = { + new FunctionSignature( + new QName("every", Function.BUILTIN_FUNCTION_NS), + "Returns true if every item in the input sequence matches the supplied predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "The predicate function (defaults to fn:boolean#1)") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all items match")), + new FunctionSignature( + new QName("every", Function.BUILTIN_FUNCTION_NS), + "Returns true if every item in the input sequence has an effective boolean value of true.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all items are truthy")) + }; + + public static final FunctionSignature[] FN_SOME = { + new FunctionSignature( + new QName("some", Function.BUILTIN_FUNCTION_NS), + "Returns true if at least one item in the input sequence matches the supplied predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "The predicate function (defaults to fn:boolean#1)") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if any item matches")), + new FunctionSignature( + new QName("some", Function.BUILTIN_FUNCTION_NS), + "Returns true if at least one item in the input sequence has an effective boolean value of true.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if any item is truthy")) + }; + + private AnalyzeContextInfo cachedContextInfo; + + public FnEverySome(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final boolean isEvery = isCalledAs("every"); + + // 1-arg overload: use effective boolean value + if (args.length == 1) { + return evalWithEBV(input, isEvery); + } + + // 2-arg overload: use predicate function (empty predicate = use EBV) + if (args[1].isEmpty()) { + return evalWithEBV(input, isEvery); + } + + if (input.isEmpty()) { + return BooleanValue.valueOf(isEvery); + } + + try (final FunctionReference ref = (FunctionReference) args[1].itemAt(0)) { + ref.analyze(cachedContextInfo); + final int arity = ref.getSignature().getArgumentCount(); + + // Validate arity: predicate must accept 0, 1, or 2 arguments + if (arity > 2) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Predicate function must accept 0, 1, or 2 arguments, but has arity " + arity); + } + + int pos = 1; + for (final SequenceIterator i = input.iterate(); i.hasNext(); pos++) { + final Item item = i.nextItem(); + final Sequence r = callPredicate(ref, item, pos, arity); + // XQ4: predicate must return xs:boolean (xs:untypedAtomic is coercible) + if (!r.isEmpty()) { + final int rType = r.itemAt(0).getType(); + if (rType != Type.BOOLEAN && rType != Type.UNTYPED_ATOMIC) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Predicate function must return xs:boolean, but returned " + + Type.getTypeName(rType)); + } + } + final boolean matches = !r.isEmpty() && r.effectiveBooleanValue(); + if (isEvery && !matches) { + return BooleanValue.FALSE; + } + if (!isEvery && matches) { + return BooleanValue.TRUE; + } + } + return BooleanValue.valueOf(isEvery); + } + } + + private Sequence evalWithEBV(final Sequence input, final boolean isEvery) throws XPathException { + if (input.isEmpty()) { + return BooleanValue.valueOf(isEvery); + } + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final boolean ebv = item.toSequence().effectiveBooleanValue(); + if (isEvery && !ebv) { + return BooleanValue.FALSE; + } + if (!isEvery && ebv) { + return BooleanValue.TRUE; + } + } + return BooleanValue.valueOf(isEvery); + } + + private Sequence callPredicate(final FunctionReference ref, final Item item, final int pos, final int arity) throws XPathException { + if (arity == 0) { + return ref.evalFunction(null, null, new Sequence[0]); + } else if (arity == 1) { + return ref.evalFunction(null, null, new Sequence[]{item.toSequence()}); + } else { + return ref.evalFunction(null, null, new Sequence[]{item.toSequence(), new IntegerValue(this, pos)}); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnExpandedQName.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnExpandedQName.java new file mode 100644 index 00000000000..7dc2190314a --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnExpandedQName.java @@ -0,0 +1,74 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.QNameValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +/** + * Implements fn:expanded-QName (XQuery 4.0). + * + * Returns a string in Q{uri}local format for a QName value. + */ +public class FnExpandedQName extends BasicFunction { + + public static final FunctionSignature FN_EXPANDED_QNAME = new FunctionSignature( + new QName("expanded-QName", Function.BUILTIN_FUNCTION_NS), + "Returns the expanded QName in Q{uri}local notation.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.QNAME, Cardinality.ZERO_OR_ONE, + "The QName value") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_ONE, + "the expanded QName string in Q{uri}local format")); + + public FnExpandedQName(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final QNameValue qnameVal = (QNameValue) args[0].itemAt(0); + final QName qname = qnameVal.getQName(); + + final String ns = qname.getNamespaceURI() != null ? qname.getNamespaceURI() : ""; + final String local = qname.getLocalPart(); + + return new StringValue(this, "Q{" + ns + "}" + local); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatDates.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatDates.java index 2ade21d3117..c39b28f29a6 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatDates.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatDates.java @@ -21,12 +21,17 @@ */ package org.exist.xquery.functions.fn; +import com.ibm.icu.text.MessageFormat; +import com.ibm.icu.text.RuleBasedNumberFormat; +import org.apache.commons.lang3.StringUtils; import org.exist.dom.QName; import org.exist.xquery.*; import org.exist.xquery.util.NumberFormatter; import org.exist.xquery.value.*; +import java.util.ArrayList; import java.util.Calendar; +import java.util.List; import java.util.Locale; import java.util.Optional; import java.util.TimeZone; @@ -152,6 +157,7 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce final String picture = args[1].getStringValue(); final String language; final Optional place; + String calendar = null; if (getArgumentCount() == 5) { if (args[2].hasOne()) { language = args[2].getStringValue(); @@ -159,6 +165,10 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce language = context.getDefaultLanguage(); } + if (args[3].hasOne()) { + calendar = args[3].getStringValue(); + } + if(args[4].hasOne()) { place = Optional.of(args[4].getStringValue()); } else { @@ -169,6 +179,32 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce place = Optional.empty(); } + // Validate calendar parameter + if (calendar != null) { + if (calendar.startsWith(":")) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Invalid calendar name: " + calendar); + } + if (calendar.startsWith("Q{}")) { + final String localPart = calendar.substring(3); + if (localPart.isEmpty() || !Character.isLetter(localPart.charAt(0))) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Invalid calendar name: " + calendar); + } + if (!isKnownCalendar(localPart)) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Unknown calendar: " + calendar); + } + } else if (calendar.startsWith("Q{") && calendar.contains("}")) { + // EQName with non-empty namespace: accept with fallback + } else if (calendar.contains(":")) { + // Prefixed QName: accept with fallback + } else if (!isKnownCalendar(calendar)) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Unknown calendar: " + calendar); + } + } + return new StringValue(this, formatDate(picture, value, language, place)); } @@ -214,6 +250,8 @@ private String formatDate(String pic, AbstractDateTimeValue dt, final String lan private void formatComponent(String component, AbstractDateTimeValue dt, final String language, final Optional place, final boolean tzHMZNPictureHint, final StringBuilder sb) throws XPathException { + // Per spec, whitespace within a variable marker is insignificant + component = component.replaceAll("\\s+", ""); final Matcher matcher = componentPattern.matcher(component); if (!matcher.matches()) { throw new XPathException(this, ErrorCodes.FOFD1340, "Unrecognized date/time component: " + component); @@ -349,8 +387,8 @@ private void formatComponent(String component, AbstractDateTimeValue dt, final S break; case 'f': if (allowTime) { - final int fraction = dt.getPart(AbstractDateTimeValue.MILLISECOND); - formatNumber(specifier, picture, width, fraction, language, sb); + final int millis = dt.getPart(AbstractDateTimeValue.MILLISECOND); + formatFractionalSeconds(millis, picture, width, sb); } else { throw new XPathException(this, ErrorCodes.FOFD1350, "format-date does not support a fractional seconds component"); @@ -384,85 +422,255 @@ private void formatComponent(String component, AbstractDateTimeValue dt, final S sb.append(formatTimeZone(picture, dtv.getPart(DurationValue.HOUR), minute, cal.getTimeZone(), language, place)); + } else if ("Z".equals(picture)) { + // Military timezone: J = local time (no timezone specified) + sb.append("J"); } break; + case 'E': + if (allowDate) { + final int year = dt.getPart(AbstractDateTimeValue.YEAR); + sb.append(year >= 0 ? "AD" : "BC"); + } else { + throw new XPathException(this, ErrorCodes.FOFD1350, + "format-time does not support an era component"); + } + break; + case 'C': + sb.append("AD"); + break; default: throw new XPathException(this, ErrorCodes.FOFD1340, "Unrecognized date/time component: " + component); } } - private String formatTimeZone(final String timezonePicture, final int hour, final int minute, + private String formatTimeZone(String timezonePicture, final int hour, final int minute, final TimeZone timeZone, final String language, final Optional place) { - final Locale locale = new Locale(language); + // Military timezone letter + if ("Z".equals(timezonePicture)) { + return formatMilitaryTimeZone(hour, minute); + } - final String format; - switch(timezonePicture) { - case "0": - if(minute != 0) { - format = "%+d:%02d"; + // Named timezone + if ("N".equals(timezonePicture)) { + final Locale locale = new Locale(language); + final TimeZone tz = place.map(TimeZone::getTimeZone).orElse(timeZone); + return tz.getDisplayName(timeZone.useDaylightTime(), TimeZone.SHORT, locale); + } + + // Check for 't' modifier (use "Z" for UTC) + final boolean useZForUTC = timezonePicture.endsWith("t"); + if (useZForUTC) { + timezonePicture = timezonePicture.substring(0, timezonePicture.length() - 1); + } + if (useZForUTC && hour == 0 && minute == 0) { + return "Z"; + } + + // Parse the picture: find digit family, separator, hour/minute digit counts + int zero = '0'; + boolean zeroFound = false; + int hourDigits = 0; + int minuteDigits = 0; + String separator = null; + + for (int i = 0; i < timezonePicture.length(); i++) { + final int ch = timezonePicture.codePointAt(i); + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0) { + if (!zeroFound) { zero = family; zeroFound = true; } + if (separator == null) { hourDigits++; } else { minuteDigits++; } + } else if (ch == '#') { + if (separator == null) { hourDigits++; } else { minuteDigits++; } + } else if (separator == null && hourDigits > 0) { + separator = new String(Character.toChars(ch)); + } + if (Character.isSupplementaryCodePoint(ch)) { i++; } + } + + final int absHour = Math.abs(hour); + final String sign = (hour < 0) ? "-" : "+"; + final StringBuilder result = new StringBuilder(sign); + + if (separator != null && minuteDigits > 0) { + result.append(padWithDigitFamily(absHour, hourDigits, zero)); + result.append(separator); + result.append(padWithDigitFamily(minute, minuteDigits, zero)); + } else if (hourDigits >= 3) { + result.append(padWithDigitFamily(absHour * 100 + minute, hourDigits, zero)); + } else { + result.append(padWithDigitFamily(absHour, hourDigits, zero)); + if (minute != 0) { + result.append(":"); + result.append(padWithDigitFamily(minute, 2, zero)); + } + } + + return result.toString(); + } + + private static String padWithDigitFamily(int value, int minDigits, int zero) { + String s = Integer.toString(value); + while (s.length() < minDigits) { s = "0" + s; } + if (zero != '0') { + final StringBuilder converted = new StringBuilder(); + for (int i = 0; i < s.length(); i++) { + final char ch = s.charAt(i); + if (ch >= '0' && ch <= '9') { + converted.appendCodePoint(zero + (ch - '0')); } else { - format = "%+d"; + converted.append(ch); } - break; + } + return converted.toString(); + } + return s; + } - case "0000": - format = "%+03d%02d"; - break; + // Military timezone: Z(0), A-I(+1 to +9), K-M(+10 to +12), N-Y(-1 to -12) + // J is reserved for local time (no timezone) and is NOT in this array + private final static char[] MILITARY_TZ_CHARS = {'Z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', + 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y' }; - case "0:00": - format = "%+d:%02d"; - break; + private String formatMilitaryTimeZone(final int hour, final int minute) { + if (minute == 0 && hour >= -12 && hour <= 12) { + final int offset = (hour < 0) ? 12 + (hour * -1) : hour; + return String.valueOf(MILITARY_TZ_CHARS[offset]); + } else { + return String.format("%+03d:%02d", hour, minute); + } + } + + /** + * Format fractional seconds as left-aligned digits. + * Unlike regular integer formatting, fractional seconds treat the value + * as a fraction (0.456) where digits are extracted left-to-right. + */ + private void formatFractionalSeconds(int millis, String picture, String width, + StringBuilder sb) throws XPathException { + // Build the fractional digit string, left-aligned, padded to 3 digits + String fracDigits = String.format("%03d", millis); + + // Count actual digit positions in picture (ignoring separators and modifiers) + int picMin = 0; + int picMax = 0; + for (int i = 0; i < picture.length(); i++) { + final char ch = picture.charAt(i); + if ((ch == 'o' || ch == 'c') && i == picture.length() - 1) { break; } + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0) { + picMin++; + picMax++; + } else if (ch == '#') { + picMax++; + } + } + + int min = picMin; + // A multi-digit picture constrains max precision; single-digit is unbounded + final boolean pictureSetsMax = (picMax > 1); + int max = pictureSetsMax ? picMax : Integer.MAX_VALUE; - case "00:00t": - if(hour == 0 && minute == 0) { - format = "Z"; + // Width specifier + final int[] widths = getWidths(width); + if (widths != null) { + if (widths[0] > 0) { min = Math.max(picMin, widths[0]); } + if (widths[1] > 0) { + if (pictureSetsMax) { + max = Math.max(picMax, widths[1]); } else { - format = "%+03d:%02d"; + max = widths[1]; } - break; + } + } + if (max < min) { max = min; } - case "N": - final TimeZone tz = place.map(TimeZone::getTimeZone).orElse(timeZone); - return tz.getDisplayName(timeZone.useDaylightTime(), TimeZone.SHORT, locale); + // Pad to min with trailing zeros + while (fracDigits.length() < min) { + fracDigits += "0"; + } - case "Z": - return formatMilitaryTimeZone(hour, minute); + // Truncate to max precision + if (fracDigits.length() > max) { + fracDigits = fracDigits.substring(0, max); + } - case "00:00": - default: - format = "%+03d:%02d"; + // Remove trailing zeros beyond min (variable-width output) + while (fracDigits.length() > min && fracDigits.endsWith("0")) { + fracDigits = fracDigits.substring(0, fracDigits.length() - 1); + } + + // Apply digit family from picture (e.g., Arabic-Indic digits) + final int digitSign = getFirstDigitInPicture(picture); + if (digitSign >= 0) { + final int zero = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(digitSign); + if (zero != '0') { + final StringBuilder converted = new StringBuilder(); + for (int i = 0; i < fracDigits.length(); i++) { + final char ch = fracDigits.charAt(i); + if (ch >= '0' && ch <= '9') { + converted.append((char)(zero + (ch - '0'))); + } else { + converted.append(ch); + } + } + fracDigits = converted.toString(); + } + } + + // Insert grouping separators from picture if present + if (hasGroupingSeparators(picture)) { + fracDigits = applyGroupingSeparators(fracDigits, picture); } - return String.format(locale, format, hour, minute); + sb.append(fracDigits); } - private final static char[] MILITARY_TZ_CHARS = {'Z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', - 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y' }; + private static int getFirstDigitInPicture(String picture) { + for (int i = 0; i < picture.length(); i++) { + final char ch = picture.charAt(i); + if (ch != '#' && ch != 'o' && ch != 'c') { + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0) { + return ch; + } + } + } + return -1; + } - /** - * Military time zone - * - * Z = +00:00, A = +01:00, B = +02:00, ..., M = +12:00, N = -01:00, O = -02:00, ... Y = -12:00. - * - * The letter J (meaning local time) is used in the case of a value that does not specify a timezone - * offset. - * - * Timezone offsets that have no representation in this system (for example Indian Standard Time, +05:30) - * are output as if the format 01:01 had been requested. - */ - private String formatMilitaryTimeZone(final int hour, final int minute) { - if(minute == 0 && hour > -12 && hour < 12) { - final int offset; - if(hour < 0) { - offset = 13 + (hour * -1); + private static boolean hasGroupingSeparators(String picture) { + for (int i = 0; i < picture.length(); i++) { + final char ch = picture.charAt(i); + if ((ch == 'o' || ch == 'c') && i == picture.length() - 1) { break; } + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family < 0 && ch != '#') { + return true; + } + } + return false; + } + + private static String applyGroupingSeparators(String digits, String picture) { + final StringBuilder result = new StringBuilder(); + int digitIdx = 0; + for (int i = 0; i < picture.length() && digitIdx < digits.length(); i++) { + final char ch = picture.charAt(i); + if ((ch == 'o' || ch == 'c') && i == picture.length() - 1) { break; } + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0 || ch == '#') { + result.append(digits.charAt(digitIdx)); + digitIdx++; } else { - offset = hour; + result.append(ch); } - return String.valueOf(MILITARY_TZ_CHARS[offset]); - } else { - return String.format("%+03d:%02d", hour, minute); } + while (digitIdx < digits.length()) { + result.append(digits.charAt(digitIdx)); + digitIdx++; + } + return result.toString(); } private String getDefaultFormat(char specifier) { @@ -512,6 +720,80 @@ private void formatNumber(char specifier, String picture, String width, int num, return; } + // Word formatting: W (uppercase), w (lowercase), Ww (title case) + // With optional ordinal modifier: Wo, wo, Wwo + final String basePicture = picture.endsWith("o") ? picture.substring(0, picture.length() - 1) : picture; + final boolean ordinalWords = picture.endsWith("o") && (basePicture.equals("W") || basePicture.equals("w") || basePicture.equals("Ww")); + if ("W".equals(basePicture) || "w".equals(basePicture) || "Ww".equals(basePicture)) { + final Locale locale = new Locale(language); + final String spelloutRule = ordinalWords ? "%spellout-ordinal" : "%spellout-cardinal"; + + // Check if the rule exists, fall back to cardinal if ordinal not available + final RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat(locale, RuleBasedNumberFormat.SPELLOUT); + String ruleToUse = spelloutRule; + boolean ruleFound = false; + for (final String ruleName : rbnf.getRuleSetNames()) { + if (ruleName.equals(ruleToUse)) { + ruleFound = true; + break; + } + } + if (!ruleFound) { + ruleToUse = "%spellout-cardinal"; + } + + final MessageFormat fmt = new MessageFormat("{0,spellout," + ruleToUse + "}", locale); + String word = fmt.format(new Object[]{num}); + + if ("W".equals(basePicture)) { + word = word.toUpperCase(locale); + } else if ("Ww".equals(basePicture)) { + // Title case: capitalize each word + final String[] parts = word.split("((?<=[ -])|(?=[ -]))"); + final StringBuilder titled = new StringBuilder(); + for (final String part : parts) { + titled.append(StringUtils.capitalize(part)); + } + word = titled.toString(); + } + // "w" is already lowercase from ICU4J + + sb.append(word); + return; + } + + // Roman numeral formatting: I (uppercase), i (lowercase) + if ("I".equals(picture) || "i".equals(picture)) { + String roman = toRoman(Math.abs(num)); + if ("i".equals(picture)) { + roman = roman.toLowerCase(); + } + sb.append(roman); + return; + } + + // Handle grouping separators in numeric pictures (e.g., [Y9;999], [Y9,999,*]) + if (hasGroupingSeparators(picture)) { + sb.append(formatWithGroupingSeparators(num, picture)); + return; + } + + // Validate optional digit placement: # must precede mandatory digits, not follow + boolean seenMandatory = false; + for (int i = 0; i < picture.length(); i++) { + final char ch = picture.charAt(i); + if ((ch == 'o' || ch == 'c') && i == picture.length() - 1) { break; } + if (ch == '#') { + if (seenMandatory) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Optional digit '#' must not appear after mandatory digits in: " + picture); + } + } else { + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0) { seenMandatory = true; } + } + } + // determine min and max width int min = NumberFormatter.getMinDigits(picture); int max = NumberFormatter.getMaxDigits(picture); @@ -531,6 +813,83 @@ private void formatNumber(char specifier, String picture, String width, int num, } } + private static final int[] ROMAN_VALUES = {1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1}; + private static final String[] ROMAN_SYMBOLS = {"M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"}; + + private static String toRoman(int num) { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < ROMAN_VALUES.length; i++) { + while (num >= ROMAN_VALUES[i]) { + sb.append(ROMAN_SYMBOLS[i]); + num -= ROMAN_VALUES[i]; + } + } + return sb.toString(); + } + + private static String formatWithGroupingSeparators(int num, String picture) { + String pic = picture; + if (pic.endsWith("o") || pic.endsWith("c")) { pic = pic.substring(0, pic.length() - 1); } + if (pic.endsWith(",*")) { pic = pic.substring(0, pic.length() - 2); } + + int zero = '0'; + for (int i = 0; i < pic.length(); i++) { + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(pic.charAt(i)); + if (family >= 0) { zero = family; break; } + } + + // Map separator positions (counted from the right) + final List sepPositions = new ArrayList<>(); + final List sepChars = new ArrayList<>(); + int digitCount = 0; + for (int i = pic.length() - 1; i >= 0; i--) { + final char ch = pic.charAt(i); + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0 || ch == '#') { + digitCount++; + } else { + sepPositions.add(digitCount); + sepChars.add(ch); + } + } + + final String digits = Integer.toString(num); + final StringBuilder result = new StringBuilder(); + int digitIdx = digits.length() - 1; + int pos = 0; + while (digitIdx >= 0) { + for (int s = 0; s < sepPositions.size(); s++) { + if (sepPositions.get(s) == pos && pos > 0) { + result.insert(0, sepChars.get(s)); + } + } + result.insert(0, digits.charAt(digitIdx)); + digitIdx--; + pos++; + } + + if (zero != '0') { + final StringBuilder converted = new StringBuilder(); + for (int i = 0; i < result.length(); i++) { + final char ch = result.charAt(i); + if (ch >= '0' && ch <= '9') { + converted.append((char)(zero + (ch - '0'))); + } else { + converted.append(ch); + } + } + return converted.toString(); + } + return result.toString(); + } + + private static boolean isKnownCalendar(final String calendar) { + return switch (calendar.toUpperCase()) { + case "AD", "ISO", "OS", "NS" -> true; + default -> false; + }; + } + private int[] getWidths(String width) throws XPathException { if (width == null || width.isEmpty()) {return null;} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatIntegers.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatIntegers.java index 6b474e08d69..e698ec4fcaa 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatIntegers.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatIntegers.java @@ -62,7 +62,7 @@ public class FnFormatIntegers extends BasicFunction { arity( FS_PARAM_VALUE, FS_PARAM_PICTURE, - optParam("lang", Type.STRING, "The language in which to format the integers.") + optParam("language", Type.STRING, "The language in which to format the integers.") ) ) ); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGraphemes.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGraphemes.java new file mode 100644 index 00000000000..45701961288 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGraphemes.java @@ -0,0 +1,86 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.BreakIterator; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:graphemes (XQuery 4.0). + * + * Splits the supplied string into a sequence of strings, each containing + * one Unicode extended grapheme cluster. + * + * Uses ICU4J's BreakIterator for Unicode grapheme cluster boundary detection, + * which handles combining marks, emoji sequences, regional indicators, etc. + */ +public class FnGraphemes extends BasicFunction { + + public static final FunctionSignature FN_GRAPHEMES = new FunctionSignature( + new QName("graphemes", Function.BUILTIN_FUNCTION_NS), + "Splits the supplied string into a sequence of strings, each containing " + + "one Unicode extended grapheme cluster.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, + "The string to split into grapheme clusters") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_MORE, + "a sequence of strings, each containing one grapheme cluster")); + + public FnGraphemes(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final String str = args[0].getStringValue(); + if (str.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final BreakIterator bi = BreakIterator.getCharacterInstance(); + bi.setText(str); + + final ValueSequence result = new ValueSequence(); + int start = bi.first(); + for (int end = bi.next(); end != BreakIterator.DONE; start = end, end = bi.next()) { + result.add(new StringValue(this, str.substring(start, end))); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHash.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHash.java new file mode 100644 index 00000000000..47a02cb8b9e --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHash.java @@ -0,0 +1,177 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.zip.CRC32; + +import org.bouncycastle.crypto.digests.Blake3Digest; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BinaryValue; +import org.exist.xquery.value.BinaryValueFromBinaryString; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.HexBinaryValueType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:hash (XQuery 4.0). + * + * Returns the result of a hash/checksum function applied to the input. + * Supports MD5, SHA-1, SHA-256, CRC-32. + */ +public class FnHash extends BasicFunction { + + public static final ErrorCodes.ErrorCode FOHA0001 = new ErrorCodes.ErrorCode("FOHA0001", + "Unsupported hash algorithm"); + + public static final FunctionSignature[] FN_HASH = { + new FunctionSignature( + new QName("hash", Function.BUILTIN_FUNCTION_NS), + "Returns the hash of the input value using the default algorithm (MD5).", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_ONE, "The value to hash (string, hexBinary, or base64Binary)") + }, + new FunctionReturnSequenceType(Type.HEX_BINARY, Cardinality.ZERO_OR_ONE, "the hash value")), + new FunctionSignature( + new QName("hash", Function.BUILTIN_FUNCTION_NS), + "Returns the hash of the input value using the specified algorithm.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_ONE, "The value to hash (string, hexBinary, or base64Binary)"), + new FunctionParameterSequenceType("algorithm", Type.STRING, Cardinality.ZERO_OR_ONE, "The hash algorithm (MD5, SHA-1, SHA-256, CRC-32)") + }, + new FunctionReturnSequenceType(Type.HEX_BINARY, Cardinality.ZERO_OR_ONE, "the hash value")), + new FunctionSignature( + new QName("hash", Function.BUILTIN_FUNCTION_NS), + "Returns the hash of the input value using the specified algorithm and options.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_ONE, "The value to hash (string, hexBinary, or base64Binary)"), + new FunctionParameterSequenceType("algorithm", Type.STRING, Cardinality.ZERO_OR_ONE, "The hash algorithm (MD5, SHA-1, SHA-256, CRC-32)"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "Options map (reserved for future use)") + }, + new FunctionReturnSequenceType(Type.HEX_BINARY, Cardinality.ZERO_OR_ONE, "the hash value")) + }; + + public FnHash(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Get the input bytes + final byte[] inputBytes = getInputBytes(args[0]); + + // Get the algorithm + String algorithm = "MD5"; + if (args.length > 1 && !args[1].isEmpty()) { + algorithm = args[1].getStringValue().trim().toUpperCase(); + } + + // Compute hash + final byte[] hashBytes; + if ("CRC-32".equals(algorithm) || "CRC32".equals(algorithm)) { + final CRC32 crc32 = new CRC32(); + crc32.update(inputBytes); + final long crcValue = crc32.getValue(); + // Return as 4-byte big-endian hexBinary + hashBytes = ByteBuffer.allocate(4).putInt((int) crcValue).array(); + } else if ("BLAKE3".equals(algorithm)) { + final Blake3Digest blake3 = new Blake3Digest(32); + blake3.update(inputBytes, 0, inputBytes.length); + hashBytes = new byte[32]; + blake3.doFinal(hashBytes, 0); + } else { + // Map algorithm names to Java MessageDigest names + final String javaAlgorithm; + switch (algorithm) { + case "MD5": + javaAlgorithm = "MD5"; + break; + case "SHA-1": + case "SHA1": + javaAlgorithm = "SHA-1"; + break; + case "SHA-256": + case "SHA256": + javaAlgorithm = "SHA-256"; + break; + case "SHA-384": + case "SHA384": + javaAlgorithm = "SHA-384"; + break; + case "SHA-512": + case "SHA512": + javaAlgorithm = "SHA-512"; + break; + default: + throw new XPathException(this, FOHA0001, + "Unsupported hash algorithm: " + algorithm); + } + try { + final MessageDigest digest = MessageDigest.getInstance(javaAlgorithm); + hashBytes = digest.digest(inputBytes); + } catch (final NoSuchAlgorithmException e) { + throw new XPathException(this, FOHA0001, + "Hash algorithm not available: " + javaAlgorithm); + } + } + + // Return as hexBinary — use BinaryValueFromBinaryString to avoid + // stream registration with the XQuery context (prevents deadlock + // in concurrent test execution environments) + final StringBuilder hex = new StringBuilder(hashBytes.length * 2); + for (final byte b : hashBytes) { + hex.append(String.format("%02X", b & 0xFF)); + } + return new BinaryValueFromBinaryString(this, new HexBinaryValueType(), hex.toString()); + } + + private byte[] getInputBytes(final Sequence value) throws XPathException { + final int type = value.itemAt(0).getType(); + if (Type.subTypeOf(type, Type.STRING) || Type.subTypeOf(type, Type.ANY_URI) || Type.subTypeOf(type, Type.UNTYPED_ATOMIC)) { + return value.getStringValue().getBytes(StandardCharsets.UTF_8); + } else if (Type.subTypeOf(type, Type.BASE64_BINARY) || Type.subTypeOf(type, Type.HEX_BINARY)) { + final BinaryValue binaryValue = (BinaryValue) value.itemAt(0); + return binaryValue.toJavaObject(byte[].class); + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "fn:hash expects string, hexBinary, or base64Binary, got: " + Type.getTypeName(type)); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHigherOrderFun40.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHigherOrderFun40.java new file mode 100644 index 00000000000..bbd77a86d8e --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHigherOrderFun40.java @@ -0,0 +1,361 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import org.exist.xquery.functions.array.ArrayType; + +/** + * Implements XQuery 4.0 higher-order functions: + * fn:index-where, fn:take-while, fn:do-until, fn:while-do, fn:sort-with, + * fn:scan-left, fn:scan-right. + */ +public class FnHigherOrderFun40 extends BasicFunction { + + public static final FunctionSignature FN_INDEX_WHERE = new FunctionSignature( + new QName("index-where", Function.BUILTIN_FUNCTION_NS), + "Returns the positions of items that match the supplied predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The predicate function") + }, + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_MORE, "positions where the predicate is true")); + + public static final FunctionSignature FN_TAKE_WHILE = new FunctionSignature( + new QName("take-while", Function.BUILTIN_FUNCTION_NS), + "Returns items from the input sequence prior to the first one that fails to match a supplied predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The predicate function") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the leading items matching the predicate")); + + public static final FunctionSignature FN_WHILE_DO = new FunctionSignature( + new QName("while-do", Function.BUILTIN_FUNCTION_NS), + "Processes a supplied value repeatedly, continuing while a condition is true.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial input"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The condition to test"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The action to apply") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the first value that fails the predicate")); + + public static final FunctionSignature FN_DO_UNTIL = new FunctionSignature( + new QName("do-until", Function.BUILTIN_FUNCTION_NS), + "Processes a supplied value repeatedly, continuing until a condition becomes true.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial input"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The action to apply"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The condition to test") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the first value that satisfies the predicate")); + + public static final FunctionSignature FN_SORT_WITH = new FunctionSignature( + new QName("sort-with", Function.BUILTIN_FUNCTION_NS), + "Sorts a sequence according to a supplied comparator function.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence to sort"), + new FunctionParameterSequenceType("comparators", Type.FUNCTION, Cardinality.ONE_OR_MORE, "The comparator function(s)") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the sorted sequence")); + + public static final FunctionSignature FN_SCAN_LEFT = new FunctionSignature( + new QName("scan-left", Function.BUILTIN_FUNCTION_NS), + "Returns successive partial results of fold-left.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("init", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial value"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, + "The accumulation function: fn(accumulator, item) as item()*") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, + "sequence of single-member arrays with successive fold results")); + + public static final FunctionSignature FN_SCAN_RIGHT = new FunctionSignature( + new QName("scan-right", Function.BUILTIN_FUNCTION_NS), + "Returns successive partial results of fold-right.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("init", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial value"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, + "The accumulation function: fn(item, accumulator) as item()*") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, + "sequence of single-member arrays with successive fold results")); + + private AnalyzeContextInfo cachedContextInfo; + + public FnHigherOrderFun40(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("while-do")) { + return whileDo(args); + } else if (isCalledAs("do-until")) { + return doUntil(args); + } else if (isCalledAs("sort-with")) { + return sortWith(args); + } else if (isCalledAs("scan-left")) { + return scanLeft(args); + } else if (isCalledAs("scan-right")) { + return scanRight(args); + } + + final Sequence input = args[0]; + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + try (final FunctionReference ref = (FunctionReference) args[1].itemAt(0)) { + ref.analyze(cachedContextInfo); + final int arity = ref.getSignature().getArgumentCount(); + + if (isCalledAs("index-where")) { + return indexWhere(input, ref, arity); + } else { + return takeWhile(input, ref, arity); + } + } + } + + private Sequence indexWhere(final Sequence input, final FunctionReference ref, final int arity) throws XPathException { + final ValueSequence result = new ValueSequence(); + int pos = 1; + for (final SequenceIterator i = input.iterate(); i.hasNext(); pos++) { + final Item item = i.nextItem(); + final Sequence r = callPredicate(ref, item, pos, arity); + if (!r.isEmpty() && r.effectiveBooleanValue()) { + result.add(new IntegerValue(this, pos)); + } + } + return result; + } + + private Sequence takeWhile(final Sequence input, final FunctionReference ref, final int arity) throws XPathException { + final ValueSequence result = new ValueSequence(); + int pos = 1; + for (final SequenceIterator i = input.iterate(); i.hasNext(); pos++) { + final Item item = i.nextItem(); + final Sequence r = callPredicate(ref, item, pos, arity); + if (r.isEmpty() || !r.effectiveBooleanValue()) { + break; + } + result.add(item); + } + return result; + } + + private Sequence callPredicate(final FunctionReference ref, final Item item, final int pos, final int arity) throws XPathException { + if (arity == 1) { + return ref.evalFunction(null, null, new Sequence[]{item.toSequence()}); + } else { + return ref.evalFunction(null, null, new Sequence[]{item.toSequence(), new IntegerValue(this, pos)}); + } + } + + private Sequence callWithSeqAndPos(final FunctionReference ref, final Sequence input, final int pos, final int arity) throws XPathException { + if (arity == 1) { + return ref.evalFunction(null, null, new Sequence[]{input}); + } else { + return ref.evalFunction(null, null, new Sequence[]{input, new IntegerValue(this, pos)}); + } + } + + private Sequence whileDo(final Sequence[] args) throws XPathException { + Sequence input = args[0]; + try (final FunctionReference predicate = (FunctionReference) args[1].itemAt(0); + final FunctionReference action = (FunctionReference) args[2].itemAt(0)) { + predicate.analyze(cachedContextInfo); + action.analyze(cachedContextInfo); + final int predArity = predicate.getSignature().getArgumentCount(); + final int actArity = action.getSignature().getArgumentCount(); + int pos = 1; + while (true) { + final Sequence test = callWithSeqAndPos(predicate, input, pos, predArity); + if (test.isEmpty() || !test.effectiveBooleanValue()) { + return input; + } + input = callWithSeqAndPos(action, input, pos, actArity); + pos++; + } + } + } + + private Sequence doUntil(final Sequence[] args) throws XPathException { + Sequence input = args[0]; + try (final FunctionReference action = (FunctionReference) args[1].itemAt(0); + final FunctionReference predicate = (FunctionReference) args[2].itemAt(0)) { + action.analyze(cachedContextInfo); + predicate.analyze(cachedContextInfo); + final int actArity = action.getSignature().getArgumentCount(); + final int predArity = predicate.getSignature().getArgumentCount(); + int pos = 1; + while (true) { + input = callWithSeqAndPos(action, input, pos, actArity); + final Sequence test = callWithSeqAndPos(predicate, input, pos, predArity); + if (!test.isEmpty() && test.effectiveBooleanValue()) { + return input; + } + pos++; + } + } + } + + private Sequence sortWith(final Sequence[] args) throws XPathException { + final Sequence input = args[0]; + if (input.getItemCount() <= 1) { + return input; + } + final Sequence comparators = args[1]; + + // Collect all items into a list + final List items = new ArrayList<>(input.getItemCount()); + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + items.add(i.nextItem()); + } + + // Get the first comparator (most test cases use a single one) + final FunctionReference[] comparatorRefs = new FunctionReference[comparators.getItemCount()]; + for (int c = 0; c < comparators.getItemCount(); c++) { + comparatorRefs[c] = (FunctionReference) comparators.itemAt(c); + comparatorRefs[c].analyze(cachedContextInfo); + } + + // Sort using the comparator(s) + try { + items.sort((a, b) -> { + try { + for (final FunctionReference comp : comparatorRefs) { + final Sequence result = comp.evalFunction(null, null, + new Sequence[]{a.toSequence(), b.toSequence()}); + final long cmp = ((IntegerValue) result.itemAt(0)).getLong(); + if (cmp != 0) { + return Long.compare(cmp, 0); + } + } + return 0; + } catch (final XPathException e) { + throw new RuntimeException(e); + } + }); + } catch (final RuntimeException e) { + if (e.getCause() instanceof XPathException) { + throw (XPathException) e.getCause(); + } + throw e; + } + + final ValueSequence result = new ValueSequence(items.size()); + for (final Item item : items) { + result.add(item); + } + return result; + } + + private Sequence scanLeft(final Sequence[] args) throws XPathException { + final Sequence input = args[0]; + Sequence accumulator = args[1]; + try (final FunctionReference action = (FunctionReference) args[2].itemAt(0)) { + action.analyze(cachedContextInfo); + + final int count = input.getItemCount(); + final ValueSequence result = new ValueSequence(count + 1); + + // First element: [init] + result.add(new ArrayType(this, context, Collections.singletonList(accumulator))); + + // For each input item, apply action and wrap result + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + accumulator = action.evalFunction(null, null, + new Sequence[]{accumulator, item.toSequence()}); + result.add(new ArrayType(this, context, Collections.singletonList(accumulator))); + } + + return result; + } + } + + private Sequence scanRight(final Sequence[] args) throws XPathException { + final Sequence input = args[0]; + final Sequence init = args[1]; + try (final FunctionReference action = (FunctionReference) args[2].itemAt(0)) { + action.analyze(cachedContextInfo); + + // Collect items into a list for reverse iteration + final List items = new ArrayList<>(input.getItemCount()); + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + items.add(i.nextItem()); + } + + // Build results from right to left + final List results = new ArrayList<>(items.size() + 1); + Sequence accumulator = init; + results.add(accumulator); + + for (int idx = items.size() - 1; idx >= 0; idx--) { + accumulator = action.evalFunction(null, null, + new Sequence[]{items.get(idx).toSequence(), accumulator}); + results.add(accumulator); + } + + // Reverse so first result is fold-right of entire sequence + Collections.reverse(results); + + final ValueSequence result = new ValueSequence(results.size()); + for (final Sequence s : results) { + result.add(new ArrayType(this, context, Collections.singletonList(s))); + } + return result; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHighestLowest.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHighestLowest.java new file mode 100644 index 00000000000..a2abad4fd94 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHighestLowest.java @@ -0,0 +1,226 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.util.ArrayList; +import java.util.List; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.FloatValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:highest and fn:lowest (XQuery 4.0). + * + * Returns items from the input having the highest/lowest key values. + */ +public class FnHighestLowest extends BasicFunction { + + public static final FunctionSignature[] FN_HIGHEST = { + new FunctionSignature( + new QName("highest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the highest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with highest key")), + new FunctionSignature( + new QName("highest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the highest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with highest key")), + new FunctionSignature( + new QName("highest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the highest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI"), + new FunctionParameterSequenceType("key", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "Key function") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with highest key")) + }; + + public static final FunctionSignature[] FN_LOWEST = { + new FunctionSignature( + new QName("lowest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the lowest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with lowest key")), + new FunctionSignature( + new QName("lowest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the lowest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with lowest key")), + new FunctionSignature( + new QName("lowest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the lowest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI"), + new FunctionParameterSequenceType("key", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "Key function") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with lowest key")) + }; + + private AnalyzeContextInfo cachedContextInfo; + + public FnHighestLowest(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Resolve collation + final Collator collator; + if (args.length >= 2 && !args[1].isEmpty()) { + collator = context.getCollator(args[1].getStringValue()); + } else { + collator = context.getDefaultCollator(); + } + + // Resolve key function (default is data#1) + FunctionReference keyRef = null; + if (args.length >= 3 && !args[2].isEmpty()) { + keyRef = (FunctionReference) args[2].itemAt(0); + keyRef.analyze(cachedContextInfo); + } + + final boolean findHighest = isCalledAs("highest"); + + // Compute keys for all items + final List items = new ArrayList<>(input.getItemCount()); + final List keys = new ArrayList<>(input.getItemCount()); + + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + items.add(item); + + // Compute key: apply key function or default atomization (fn:data) + final AtomicValue keyVal; + if (keyRef != null) { + final Sequence keyResult = keyRef.evalFunction(null, null, new Sequence[]{item.toSequence()}); + if (keyResult.isEmpty()) { + keyVal = null; + } else { + AtomicValue kv = keyResult.itemAt(0).atomize(); + if (kv.getType() == Type.UNTYPED_ATOMIC) { + kv = kv.convertTo(Type.DOUBLE); + } + keyVal = kv; + } + } else { + // Default key is fn:data() — atomize the item directly + final AtomicValue atomized = item.atomize(); + if (atomized.getType() == Type.UNTYPED_ATOMIC) { + keyVal = atomized.convertTo(Type.DOUBLE); + } else { + keyVal = atomized; + } + } + keys.add(keyVal); + } + + // Find the extreme value + AtomicValue extremeKey = null; + for (final AtomicValue key : keys) { + if (key == null || isNaN(key)) { + continue; + } + if (extremeKey == null) { + extremeKey = key; + } else { + final int cmp = key.compareTo(collator, extremeKey); + if (findHighest ? cmp > 0 : cmp < 0) { + extremeKey = key; + } + } + } + + if (extremeKey == null) { + return Sequence.EMPTY_SEQUENCE; + } + + // Collect all items with the extreme key value + final ValueSequence result = new ValueSequence(); + for (int i = 0; i < items.size(); i++) { + final AtomicValue key = keys.get(i); + if (key != null && !isNaN(key) && key.compareTo(collator, extremeKey) == 0) { + result.add(items.get(i)); + } + } + + if (keyRef != null) { + keyRef.close(); + } + + return result; + } + + private static boolean isNaN(final AtomicValue v) { + if (v instanceof DoubleValue) { + return Double.isNaN(((DoubleValue) v).getDouble()); + } + if (v instanceof FloatValue) { + return Float.isNaN(((FloatValue) v).getValue()); + } + return false; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHtmlDoc.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHtmlDoc.java new file mode 100644 index 00000000000..bade6cf1717 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHtmlDoc.java @@ -0,0 +1,71 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +/** + * fn:html-doc($uri) — Like fn:doc but for HTML. + * Loads HTML from a URI, parses it through fn:parse-html, returns XHTML document. + */ +public class FnHtmlDoc extends BasicFunction { + + public static final FunctionSignature FN_HTML_DOC = new FunctionSignature( + new QName("html-doc", Function.BUILTIN_FUNCTION_NS), + "Loads an HTML resource from a URI and returns the parsed XHTML document.", + new SequenceType[] { + new FunctionParameterSequenceType("uri", Type.STRING, + Cardinality.ZERO_OR_ONE, "The URI of the HTML resource") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, + "The parsed XHTML document")); + + public FnHtmlDoc(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String uri = args[0].getStringValue(); + + // Load text content using unparsed-text logic + final FunUnparsedText unparsedText = new FunUnparsedText(context, + FunUnparsedText.FS_UNPARSED_TEXT[0]); + final Sequence textResult = unparsedText.eval( + new Sequence[]{new StringValue(this, uri)}, contextSequence); + + if (textResult.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Parse through fn:parse-html + final FnParseHtml parseHtml = new FnParseHtml(context, + FnParseHtml.FN_PARSE_HTML[0]); + return parseHtml.eval(new Sequence[]{textResult}, contextSequence); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIdentityVoid.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIdentityVoid.java new file mode 100644 index 00000000000..777b717788d --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIdentityVoid.java @@ -0,0 +1,78 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:identity and fn:void (XQuery 4.0). + */ +public class FnIdentityVoid extends BasicFunction { + + public static final FunctionSignature FN_IDENTITY = new FunctionSignature( + new QName("identity", Function.BUILTIN_FUNCTION_NS), + "Returns its argument value unchanged.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input value") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the input value unchanged")); + + public static final FunctionSignature[] FN_VOID = { + new FunctionSignature( + new QName("void", Function.BUILTIN_FUNCTION_NS), + "Absorbs the argument and returns the empty sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input to discard") + }, + new FunctionReturnSequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE, "the empty sequence")), + new FunctionSignature( + new QName("void", Function.BUILTIN_FUNCTION_NS), + "Returns the empty sequence.", + new SequenceType[] {}, + new FunctionReturnSequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE, "the empty sequence")) + }; + + public FnIdentityVoid(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("identity")) { + return args[0]; + } else { + // void: discard input, return empty sequence + return Sequence.EMPTY_SEQUENCE; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInScopeNamespaces.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInScopeNamespaces.java new file mode 100644 index 00000000000..b75d1d508bf --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInScopeNamespaces.java @@ -0,0 +1,152 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.Namespaces; +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; +import org.w3c.dom.Element; +import org.w3c.dom.Node; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; +import java.util.HashSet; + +/** + * Implements XQuery 4.0 fn:in-scope-namespaces. + * + * Returns a map(xs:string, xs:string) where keys are namespace prefixes + * (empty string for the default namespace) and values are namespace URIs. + * + * Uses nearest-ancestor-wins semantics: for each prefix, the declaration on + * the nearest ancestor (or the element itself) takes precedence. + */ +public class FnInScopeNamespaces extends BasicFunction { + + public static final FunctionSignature FN_IN_SCOPE_NAMESPACES = new FunctionSignature( + new QName("in-scope-namespaces", Function.BUILTIN_FUNCTION_NS), + "Returns a map from namespace prefixes to namespace URIs for all in-scope namespaces of the given element.", + new SequenceType[]{ + new FunctionParameterSequenceType("element", Type.ELEMENT, Cardinality.EXACTLY_ONE, "The element node") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "A map of prefix to URI")); + + public FnInScopeNamespaces(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final NodeValue nodeValue = (NodeValue) args[0].itemAt(0); + + // Collect all in-scope namespaces with nearest-ancestor-wins semantics + final Map nsMap = new LinkedHashMap<>(); + nsMap.put("xml", Namespaces.XML_NS); + + // Start with static context namespaces (lowest priority) + final Map inScopePrefixes = context.getInScopePrefixes(); + if (inScopePrefixes != null) { + nsMap.putAll(inScopePrefixes); + } + + // Walk from element up to root, collecting namespace declarations. + // Track which prefixes we've already seen from closer ancestors + // so that nearer declarations override farther ones. + final Set seen = new HashSet<>(); + final Map elementNs = new LinkedHashMap<>(); + Node node = nodeValue.getNode(); + + if (context.preserveNamespaces()) { + while (node != null && node.getNodeType() == Node.ELEMENT_NODE) { + if (context.inheritNamespaces() || node == nodeValue.getNode()) { + collectElementNamespaces((Element) node, elementNs, seen); + } + node = node.getParentNode(); + } + } + + // Element declarations override static context (merge on top) + nsMap.putAll(elementNs); + + // Clean up: remove entries where both key and value are empty + nsMap.entrySet().removeIf(entry -> + (entry.getKey() == null || entry.getKey().isEmpty()) && + (entry.getValue() == null || entry.getValue().isEmpty())); + + // Build the result map + MapType result = new MapType(this, context); + for (final Map.Entry entry : nsMap.entrySet()) { + result = (MapType) result.put( + new StringValue(this, entry.getKey()), + new StringValue(this, entry.getValue())); + } + + return result; + } + + /** + * Collect namespace declarations from a single element, respecting nearest-wins. + * Only adds prefixes not already in the {@code seen} set. + */ + private static void collectElementNamespaces(final Element element, final Map nsMap, final Set seen) { + // Element's own namespace + final String namespaceURI = element.getNamespaceURI(); + if (namespaceURI != null && !namespaceURI.isEmpty()) { + final String prefix = element.getPrefix(); + final String key = prefix == null ? "" : prefix; + if (seen.add(key)) { + nsMap.put(key, namespaceURI); + } + } + + // Namespace declarations from the element + if (element instanceof org.exist.dom.memtree.ElementImpl) { + final Map elemNs = new LinkedHashMap<>(); + ((org.exist.dom.memtree.ElementImpl) element).getNamespaceMap(elemNs); + for (final Map.Entry entry : elemNs.entrySet()) { + if (seen.add(entry.getKey())) { + nsMap.put(entry.getKey(), entry.getValue()); + } + } + } else if (element instanceof org.exist.dom.persistent.ElementImpl) { + final org.exist.dom.persistent.ElementImpl elemImpl = (org.exist.dom.persistent.ElementImpl) element; + if (elemImpl.declaresNamespacePrefixes()) { + for (final java.util.Iterator i = elemImpl.getPrefixes(); i.hasNext(); ) { + final String prefix = i.next(); + if (seen.add(prefix)) { + nsMap.put(prefix, elemImpl.getNamespaceForPrefix(prefix)); + } + } + } + } + + // Handle undeclaration: if namespace URI is explicitly empty, remove the prefix + if (namespaceURI != null && namespaceURI.isEmpty()) { + final String prefix = element.getPrefix(); + final String key = prefix == null ? "" : prefix; + nsMap.remove(key); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInsertSeparator.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInsertSeparator.java new file mode 100644 index 00000000000..ffe3729a0d3 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInsertSeparator.java @@ -0,0 +1,74 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:insert-separator (XQuery 4.0). + * + * Inserts a separator between adjacent items in a sequence. + */ +public class FnInsertSeparator extends BasicFunction { + + public static final FunctionSignature FN_INSERT_SEPARATOR = new FunctionSignature( + new QName("insert-separator", Function.BUILTIN_FUNCTION_NS), + "Inserts a separator between adjacent items in a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("separator", Type.ITEM, Cardinality.ZERO_OR_MORE, "The separator to insert") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the sequence with separators inserted")); + + public FnInsertSeparator(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final Sequence separator = args[1]; + final int inputSize = input.getItemCount(); + if (inputSize <= 1 || separator.isEmpty()) { + return input; + } + final ValueSequence result = new ValueSequence(inputSize + (inputSize - 1) * separator.getItemCount()); + result.add(input.itemAt(0)); + for (int i = 1; i < inputSize; i++) { + result.addAll(separator); + result.add(input.itemAt(i)); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInvisibleXml.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInvisibleXml.java new file mode 100644 index 00000000000..3599bf77dac --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInvisibleXml.java @@ -0,0 +1,308 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import de.bottlecaps.markup.Blitz; +import de.bottlecaps.markup.BlitzException; +import de.bottlecaps.markup.BlitzParseException; + +import org.exist.Namespaces; +import org.exist.dom.QName; +import org.exist.dom.memtree.DocumentImpl; +import org.exist.dom.memtree.SAXAdapter; +import org.exist.util.XMLReaderPool; +import org.exist.xquery.value.NodeValue; +import org.exist.xquery.AbstractExpression; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionCall; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.UserDefinedFunction; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.xml.sax.InputSource; +import org.xml.sax.XMLReader; + +import javax.xml.XMLConstants; +import java.io.StringReader; + +/** + * Implements fn:invisible-xml() (XQuery 4.0). + * + * Compiles an Invisible XML grammar and returns a function that parses input + * strings into XML documents. + * + * Uses the Markup Blitz library for ixml grammar compilation and parsing. + * Integration pattern informed by BaseX's implementation. + */ +public class FnInvisibleXml extends BasicFunction { + + // Blitz.generateFromXml() is not thread-safe — synchronize XML grammar compilation + private static final Object BLITZ_XML_LOCK = new Object(); + + private static final FunctionParameterSequenceType PARAM_GRAMMAR = + new FunctionParameterSequenceType("grammar", Type.ITEM, + Cardinality.ZERO_OR_ONE, "The ixml grammar (string or element node)"); + private static final FunctionParameterSequenceType PARAM_OPTIONS = + new FunctionParameterSequenceType("options", Type.MAP_ITEM, + Cardinality.ZERO_OR_ONE, "Options map (fail-on-error: xs:boolean)"); + private static final FunctionReturnSequenceType RETURN_TYPE = + new FunctionReturnSequenceType(Type.FUNCTION, Cardinality.EXACTLY_ONE, + "a function that parses strings according to the grammar"); + + public static final FunctionSignature[] SIGNATURES = { + new FunctionSignature( + new QName("invisible-xml", Function.BUILTIN_FUNCTION_NS), + "Compiles an Invisible XML grammar and returns a parsing function.", + new SequenceType[] { PARAM_GRAMMAR }, + RETURN_TYPE), + new FunctionSignature( + new QName("invisible-xml", Function.BUILTIN_FUNCTION_NS), + "Compiles an Invisible XML grammar and returns a parsing function.", + new SequenceType[] { PARAM_GRAMMAR, PARAM_OPTIONS }, + RETURN_TYPE) + }; + + private AnalyzeContextInfo cachedContextInfo; + + public FnInvisibleXml(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence grammarArg = args[0]; + + // Parse options — default fail-on-error is false per spec + boolean failOnError = false; + if (args.length > 1 && !args[1].isEmpty()) { + final AbstractMapType options = (AbstractMapType) args[1].itemAt(0); + final Sequence failOpt = options.get(new StringValue(this, "fail-on-error")); + if (!failOpt.isEmpty()) { + final Item failItem = failOpt.itemAt(0); + if (failItem.getType() != Type.BOOLEAN) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Option 'fail-on-error' must be xs:boolean, got: " + + Type.getTypeName(failItem.getType())); + } + failOnError = ((BooleanValue) failItem).getValue(); + } else if (options.contains(new StringValue(this, "fail-on-error"))) { + // Key exists but value is empty sequence + throw new XPathException(this, ErrorCodes.XPTY0004, + "Option 'fail-on-error' must be xs:boolean, got empty sequence"); + } + // else: key not present, use default (false) + } + + // Compile the grammar + final de.bottlecaps.markup.blitz.Parser parser; + try { + if (grammarArg.isEmpty()) { + // Empty sequence = use default ixml grammar + parser = failOnError + ? Blitz.generate(Blitz.ixmlGrammar(), Blitz.Option.FAIL_ON_ERROR) + : Blitz.generate(Blitz.ixmlGrammar()); + } else { + final Item grammarItem = grammarArg.itemAt(0); + final int grammarType = grammarItem.getType(); + + if (Type.subTypeOf(grammarType, Type.ELEMENT)) { + // Element node — serialize to XML string and use generateFromXml + // Synchronized: Blitz.generateFromXml() is not thread-safe + final String xmlGrammar = serializeItem(grammarItem); + synchronized (BLITZ_XML_LOCK) { + parser = failOnError + ? Blitz.generateFromXml(xmlGrammar, Blitz.Option.FAIL_ON_ERROR) + : Blitz.generateFromXml(xmlGrammar); + } + } else if (Type.subTypeOf(grammarType, Type.STRING) || + grammarType == Type.UNTYPED_ATOMIC) { + // String grammar + final String grammarStr = grammarItem.getStringValue(); + parser = failOnError + ? Blitz.generate(grammarStr, Blitz.Option.FAIL_ON_ERROR) + : Blitz.generate(grammarStr); + } else if (Type.subTypeOf(grammarType, Type.NODE)) { + // Other node types (document, etc.) — not valid + throw new XPathException(this, ErrorCodes.FOIX0001, + "Grammar must be an element node or string, got: " + + Type.getTypeName(grammarType)); + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Grammar must be a string or element node, got: " + + Type.getTypeName(grammarType)); + } + } + } catch (final BlitzParseException ex) { + throw new XPathException(this, ErrorCodes.FOIX0001, + "Invalid ixml grammar at line " + ex.getLine() + ", column " + ex.getColumn() + + ": " + ex.getOffendingToken()); + } catch (final BlitzException ex) { + throw new XPathException(this, ErrorCodes.FOIX0001, + "Invalid ixml grammar: " + ex.getMessage()); + } + + // Create a function item that parses input strings using the compiled grammar + final QName inputParam = new QName("input", XMLConstants.NULL_NS_URI); + + final FunctionSignature parseSig = new FunctionSignature( + new QName("invisible-xml-parser", Function.BUILTIN_FUNCTION_NS), + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.STRING, + Cardinality.EXACTLY_ONE, "The string to parse") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.EXACTLY_ONE, + "the parsed XML document")); + + final UserDefinedFunction func = new UserDefinedFunction(context, parseSig); + func.addVariable(inputParam); + func.setFunctionBody(new ParseExpression(context, parser, inputParam, failOnError)); + + final FunctionCall call = new FunctionCall(context, func); + call.setLocation(getLine(), getColumn()); + + return new FunctionReference(this, call); + } + + private String serializeItem(final Item item) throws XPathException { + try { + final org.exist.storage.serializers.Serializer serializer = + context.getBroker().borrowSerializer(); + try { + serializer.setProperty(javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION, "yes"); + serializer.setProperty(javax.xml.transform.OutputKeys.INDENT, "no"); + return serializer.serialize((NodeValue) item); + } finally { + context.getBroker().returnSerializer(serializer); + } + } catch (final Exception ex) { + throw new XPathException(this, ErrorCodes.FOIX0001, + "Failed to serialize grammar node: " + ex.getMessage()); + } + } + + /** + * Expression that parses an input string using a compiled ixml parser. + */ + private static class ParseExpression extends AbstractExpression { + + private final de.bottlecaps.markup.blitz.Parser parser; + private final QName inputVar; + private final boolean failOnError; + + ParseExpression(final XQueryContext context, final de.bottlecaps.markup.blitz.Parser parser, + final QName inputVar, final boolean failOnError) { + super(context); + this.parser = parser; + this.inputVar = inputVar; + this.failOnError = failOnError; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final String input = context.resolveVariable(inputVar).getValue().getStringValue(); + + // Parse the input using the compiled ixml parser + final String xmlResult; + try { + xmlResult = parser.parse(input); + } catch (final BlitzParseException ex) { + if (failOnError) { + throw new XPathException(this, ErrorCodes.FOIX0002, + "ixml parse error at line " + ex.getLine() + ", column " + ex.getColumn() + + ": " + ex.getOffendingToken()); + } + // Should not happen when FAIL_ON_ERROR is not set, but handle gracefully + throw new XPathException(this, ErrorCodes.FOIX0002, + "ixml parse error: " + ex.getMessage()); + } catch (final BlitzException ex) { + throw new XPathException(this, ErrorCodes.FOIX0002, + "ixml parse error: " + ex.getMessage()); + } + + // Check for ixml:state="failed" on the root element when fail-on-error is true + if (failOnError && xmlResult.contains("ixml:state=\"failed\"")) { + throw new XPathException(this, ErrorCodes.FOIX0002, + "ixml parse failed: input is ambiguous or does not match the grammar"); + } + + // Parse the XML string into an in-memory document + return parseXmlString(xmlResult); + } + + private DocumentImpl parseXmlString(final String xml) throws XPathException { + final XMLReaderPool parserPool = context.getBroker().getBrokerPool().getXmlReaderPool(); + XMLReader xr = null; + try { + xr = parserPool.borrowXMLReader(); + final InputSource src = new InputSource(new StringReader(xml)); + final SAXAdapter adapter = new SAXAdapter(this, context); + xr.setContentHandler(adapter); + xr.setProperty(Namespaces.SAX_LEXICAL_HANDLER, adapter); + xr.parse(src); + return adapter.getDocument(); + } catch (final Exception ex) { + throw new XPathException(this, ErrorCodes.FOIX0002, + "Failed to parse ixml output as XML: " + ex.getMessage()); + } finally { + if (xr != null) { + parserPool.returnXMLReader(xr); + } + } + } + + @Override + public int returnsType() { + return Type.DOCUMENT; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + // nothing to analyze + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("invisible-xml-parser(...)"); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIsNaN.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIsNaN.java new file mode 100644 index 00000000000..5e3e8b1754b --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIsNaN.java @@ -0,0 +1,71 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.FloatValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:is-NaN (XQuery 4.0). + * + * Returns true if the argument is the xs:float or xs:double value NaN. + */ +public class FnIsNaN extends BasicFunction { + + public static final FunctionSignature FN_IS_NAN = new FunctionSignature( + new QName("is-NaN", Function.BUILTIN_FUNCTION_NS), + "Returns true if the argument is the xs:float or xs:double value NaN.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The value to test") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if the value is NaN")); + + public FnIsNaN(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Item item = args[0].itemAt(0); + final int type = item.getType(); + if (type == Type.DOUBLE) { + return BooleanValue.valueOf(Double.isNaN(((DoubleValue) item).getValue())); + } else if (type == Type.FLOAT) { + return BooleanValue.valueOf(Float.isNaN(((FloatValue) item).getValue())); + } + return BooleanValue.FALSE; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnItemsAt.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnItemsAt.java new file mode 100644 index 00000000000..55c9bf64d74 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnItemsAt.java @@ -0,0 +1,79 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:items-at (XQuery 4.0). + * + * Returns items from the input at the positions specified by the second argument. + */ +public class FnItemsAt extends BasicFunction { + + public static final FunctionSignature FN_ITEMS_AT = new FunctionSignature( + new QName("items-at", Function.BUILTIN_FUNCTION_NS), + "Returns the items at the specified positions in the input sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("at", Type.INTEGER, Cardinality.ZERO_OR_MORE, "The positions to select") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items at the specified positions")); + + public FnItemsAt(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final Sequence at = args[1]; + if (input.isEmpty() || at.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final int inputSize = input.getItemCount(); + final ValueSequence result = new ValueSequence(); + for (final SequenceIterator i = at.iterate(); i.hasNext(); ) { + final Item posItem = i.nextItem(); + final int pos = (int) ((IntegerValue) posItem).getLong(); + if (pos >= 1 && pos <= inputSize) { + result.add(input.itemAt(pos - 1)); + } + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnMessage.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnMessage.java new file mode 100644 index 00000000000..db50f1319b4 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnMessage.java @@ -0,0 +1,85 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements XQuery 4.0 fn:message. + * + * Similar to fn:trace but returns empty-sequence() instead of passing through values. + * Outputs the input values (and optional label) to the log. + */ +public class FnMessage extends BasicFunction { + + private static final Logger LOG = LogManager.getLogger(FnMessage.class); + + public static final FunctionSignature[] FN_MESSAGE = { + new FunctionSignature( + new QName("message", Function.BUILTIN_FUNCTION_NS), + "Outputs values to the log and returns empty sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The values to output") + }, + new FunctionReturnSequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE, "empty sequence")), + new FunctionSignature( + new QName("message", Function.BUILTIN_FUNCTION_NS), + "Outputs values to the log with a label and returns empty sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The values to output"), + new FunctionParameterSequenceType("label", Type.STRING, Cardinality.ZERO_OR_ONE, "Optional label for the output") + }, + new FunctionReturnSequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE, "empty sequence")) + }; + + public FnMessage(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final String label = (args.length > 1 && !args[1].isEmpty()) ? args[1].getStringValue() : null; + + final String value = input.getStringValue(); + if (label != null && !label.isEmpty()) { + LOG.info("{}: {}", label, value); + } else { + LOG.info("{}", value); + } + + return Sequence.EMPTY_SEQUENCE; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java index 413c58b5f3d..af783e6fbf0 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java @@ -67,7 +67,7 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunData.signatures[1], FunData.class), new FunctionDef(FunDateTime.signature, FunDateTime.class), new FunctionDef(FunDeepEqual.signatures[0], FunDeepEqual.class), - new FunctionDef(FunDeepEqual.signatures[1], FunDeepEqual.class), + new FunctionDef(FnDeepEqualOptions.FN_DEEP_EQUAL_OPTIONS, FnDeepEqualOptions.class), new FunctionDef(FunDefaultCollation.signature, FunDefaultCollation.class), new FunctionDef(FnDefaultLanguage.FS_DEFAULT_LANGUAGE, FnDefaultLanguage.class), new FunctionDef(FunDistinctValues.signatures[0], FunDistinctValues.class), @@ -152,6 +152,8 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunOnFunctions.signatures[0], FunOnFunctions.class), new FunctionDef(FunOnFunctions.signatures[1], FunOnFunctions.class), new FunctionDef(FunOnFunctions.signatures[2], FunOnFunctions.class), + new FunctionDef(FunOnFunctions.signatures[3], FunOnFunctions.class), + new FunctionDef(FunOnFunctions.signatures[4], FunOnFunctions.class), new FunctionDef(FunMatches.signatures[0], FunMatches.class), new FunctionDef(FunMatches.signatures[1], FunMatches.class), new FunctionDef(FunMax.signatures[0], FunMax.class), @@ -190,6 +192,7 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunRoot.signatures[1], FunRoot.class), new FunctionDef(FunRound.FN_ROUND_SIGNATURES[0], FunRound.class), new FunctionDef(FunRound.FN_ROUND_SIGNATURES[1], FunRound.class), + new FunctionDef(FunRound.FN_ROUND_SIGNATURES[2], FunRound.class), new FunctionDef(FunRoundHalfToEven.FN_ROUND_HALF_TO_EVEN_SIGNATURES[0], FunRoundHalfToEven.class), new FunctionDef(FunRoundHalfToEven.FN_ROUND_HALF_TO_EVEN_SIGNATURES[1], FunRoundHalfToEven.class), new FunctionDef(FunSerialize.signatures[0], FunSerialize.class), @@ -240,8 +243,10 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunEquals.signatures[1], FunEquals.class), new FunctionDef(FunAnalyzeString.signatures[0], FunAnalyzeString.class), new FunctionDef(FunAnalyzeString.signatures[1], FunAnalyzeString.class), - new FunctionDef(FunHeadTail.signatures[0], FunHeadTail.class), - new FunctionDef(FunHeadTail.signatures[1], FunHeadTail.class), + new FunctionDef(FunHeadTail.FN_HEAD, FunHeadTail.class), + new FunctionDef(FunHeadTail.FN_TAIL, FunHeadTail.class), + new FunctionDef(FunHeadTail.FN_FOOT, FunHeadTail.class), + new FunctionDef(FunHeadTail.FN_TRUNK, FunHeadTail.class), new FunctionDef(FunHigherOrderFun.FN_FOR_EACH, FunHigherOrderFun.class), new FunctionDef(FunHigherOrderFun.FN_FOR_EACH_PAIR, FunHigherOrderFun.class), new FunctionDef(FunHigherOrderFun.FN_FILTER, FunHigherOrderFun.class), @@ -272,7 +277,120 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FnRandomNumberGenerator.FS_RANDOM_NUMBER_GENERATOR[0], FnRandomNumberGenerator.class), new FunctionDef(FnRandomNumberGenerator.FS_RANDOM_NUMBER_GENERATOR[1], FnRandomNumberGenerator.class), new FunctionDef(FunContainsToken.FS_CONTAINS_TOKEN[0], FunContainsToken.class), - new FunctionDef(FunContainsToken.FS_CONTAINS_TOKEN[1], FunContainsToken.class) + new FunctionDef(FunContainsToken.FS_CONTAINS_TOKEN[1], FunContainsToken.class), + // XQuery 4.0 functions + new FunctionDef(FnIdentityVoid.FN_IDENTITY, FnIdentityVoid.class), + new FunctionDef(FnIdentityVoid.FN_VOID[0], FnIdentityVoid.class), + new FunctionDef(FnIdentityVoid.FN_VOID[1], FnIdentityVoid.class), + new FunctionDef(FnIsNaN.FN_IS_NAN, FnIsNaN.class), + new FunctionDef(FnCharacters.FN_CHARACTERS, FnCharacters.class), + new FunctionDef(FnGraphemes.FN_GRAPHEMES, FnGraphemes.class), + new FunctionDef(FnParseHtml.FN_PARSE_HTML[0], FnParseHtml.class), + new FunctionDef(FnParseHtml.FN_PARSE_HTML[1], FnParseHtml.class), + new FunctionDef(FnCollation.FN_COLLATION, FnCollation.class), + new FunctionDef(FnCollation.FN_COLLATION_AVAILABLE, FnCollation.class), + new FunctionDef(FnHtmlDoc.FN_HTML_DOC, FnHtmlDoc.class), + new FunctionDef(FnUnparsedBinary.FN_UNPARSED_BINARY, FnUnparsedBinary.class), + new FunctionDef(FnDateTimeParts.FN_BUILD_DATETIME, FnDateTimeParts.class), + new FunctionDef(FnDateTimeParts.FN_PARTS_OF_DATETIME, FnDateTimeParts.class), + new FunctionDef(FnReplicate.FN_REPLICATE, FnReplicate.class), + new FunctionDef(FnInsertSeparator.FN_INSERT_SEPARATOR, FnInsertSeparator.class), + new FunctionDef(FnAllEqualDifferent.FN_ALL_EQUAL[0], FnAllEqualDifferent.class), + new FunctionDef(FnAllEqualDifferent.FN_ALL_EQUAL[1], FnAllEqualDifferent.class), + new FunctionDef(FnAllEqualDifferent.FN_ALL_DIFFERENT[0], FnAllEqualDifferent.class), + new FunctionDef(FnAllEqualDifferent.FN_ALL_DIFFERENT[1], FnAllEqualDifferent.class), + new FunctionDef(FnItemsAt.FN_ITEMS_AT, FnItemsAt.class), + new FunctionDef(FnHigherOrderFun40.FN_INDEX_WHERE, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_TAKE_WHILE, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_WHILE_DO, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_DO_UNTIL, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_SORT_WITH, FnHigherOrderFun40.class), + new FunctionDef(FnSlice.FN_SLICE[0], FnSlice.class), + new FunctionDef(FnSlice.FN_SLICE[1], FnSlice.class), + new FunctionDef(FnSlice.FN_SLICE[2], FnSlice.class), + new FunctionDef(FnSlice.FN_SLICE[3], FnSlice.class), + new FunctionDef(FnDuplicateValues.FN_DUPLICATE_VALUES[0], FnDuplicateValues.class), + new FunctionDef(FnDuplicateValues.FN_DUPLICATE_VALUES[1], FnDuplicateValues.class), + new FunctionDef(FnHash.FN_HASH[0], FnHash.class), + new FunctionDef(FnHash.FN_HASH[1], FnHash.class), + new FunctionDef(FnHash.FN_HASH[2], FnHash.class), + new FunctionDef(FnOp.FN_OP, FnOp.class), + new FunctionDef(FnChar.FN_CHAR, FnChar.class), + new FunctionDef(FnAtomicEqual.FN_ATOMIC_EQUAL, FnAtomicEqual.class), + new FunctionDef(FnExpandedQName.FN_EXPANDED_QNAME, FnExpandedQName.class), + new FunctionDef(FnHighestLowest.FN_HIGHEST[0], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_HIGHEST[1], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_HIGHEST[2], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_LOWEST[0], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_LOWEST[1], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_LOWEST[2], FnHighestLowest.class), + new FunctionDef(FnPartition.FN_PARTITION, FnPartition.class), + new FunctionDef(FnParseUri.FN_PARSE_URI[0], FnParseUri.class), + new FunctionDef(FnParseUri.FN_PARSE_URI[1], FnParseUri.class), + new FunctionDef(FnBuildUri.FN_BUILD_URI[0], FnBuildUri.class), + new FunctionDef(FnBuildUri.FN_BUILD_URI[1], FnBuildUri.class), + new FunctionDef(FnHigherOrderFun40.FN_SCAN_LEFT, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_SCAN_RIGHT, FnHigherOrderFun40.class), + // XQuery 4.0 functions — batch 1: HOFs and subsequence matching + new FunctionDef(FnEverySome.FN_EVERY[0], FnEverySome.class), + new FunctionDef(FnEverySome.FN_EVERY[1], FnEverySome.class), + new FunctionDef(FnEverySome.FN_SOME[0], FnEverySome.class), + new FunctionDef(FnEverySome.FN_SOME[1], FnEverySome.class), + new FunctionDef(FnSortBy.FN_SORT_BY, FnSortBy.class), + new FunctionDef(FnPartialApply.FN_PARTIAL_APPLY, FnPartialApply.class), + new FunctionDef(FnSubsequenceMatching.FN_CONTAINS_SUBSEQUENCE[0], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_CONTAINS_SUBSEQUENCE[1], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_STARTS_WITH_SUBSEQUENCE[0], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_STARTS_WITH_SUBSEQUENCE[1], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_ENDS_WITH_SUBSEQUENCE[0], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_ENDS_WITH_SUBSEQUENCE[1], FnSubsequenceMatching.class), + // XQuery 4.0 functions — batch 2: string/number/URI + new FunctionDef(FnDecodeFromUri.FN_DECODE_FROM_URI, FnDecodeFromUri.class), + new FunctionDef(FnParseInteger.FN_PARSE_INTEGER[0], FnParseInteger.class), + new FunctionDef(FnParseInteger.FN_PARSE_INTEGER[1], FnParseInteger.class), + new FunctionDef(FnDivideDecimals.FN_DIVIDE_DECIMALS[0], FnDivideDecimals.class), + new FunctionDef(FnDivideDecimals.FN_DIVIDE_DECIMALS[1], FnDivideDecimals.class), + // XQuery 4.0 functions — batch 3: node and type + new FunctionDef(FnDistinctOrderedNodes.FN_DISTINCT_ORDERED_NODES, FnDistinctOrderedNodes.class), + new FunctionDef(FnSiblings.FN_SIBLINGS[0], FnSiblings.class), + new FunctionDef(FnSiblings.FN_SIBLINGS[1], FnSiblings.class), + new FunctionDef(FnTypeOf.FN_TYPE_OF, FnTypeOf.class), + // XQuery 4.0 functions — batch 4: date/time and misc + new FunctionDef(FnUnixDateTime.FN_UNIX_DATETIME[0], FnUnixDateTime.class), + new FunctionDef(FnUnixDateTime.FN_UNIX_DATETIME[1], FnUnixDateTime.class), + new FunctionDef(FnMessage.FN_MESSAGE[0], FnMessage.class), + new FunctionDef(FnMessage.FN_MESSAGE[1], FnMessage.class), + // XQuery 4.0 functions — batch 2 (continued): parse-QName + new FunctionDef(FnParseQName.FN_PARSE_QNAME, FnParseQName.class), + // XQuery 4.0 functions — batch 3 (continued): type annotation + new FunctionDef(FnTypeAnnotation.FN_ATOMIC_TYPE_ANNOTATION, FnTypeAnnotation.class), + new FunctionDef(FnTypeAnnotation.FN_NODE_TYPE_ANNOTATION, FnTypeAnnotation.class), + // XQuery 4.0 functions — batch 4 (continued): civil-timezone + new FunctionDef(FnCivilTimezone.FN_CIVIL_TIMEZONE[0], FnCivilTimezone.class), + new FunctionDef(FnCivilTimezone.FN_CIVIL_TIMEZONE[1], FnCivilTimezone.class), + // XQuery 4.0 functions — batch 5: CSV functions + new FunctionDef(CsvFunctions.FN_CSV_TO_ARRAYS[0], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_TO_ARRAYS[1], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_PARSE_CSV[0], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_PARSE_CSV[1], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_TO_XML[0], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_TO_XML[1], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_DOC[0], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_DOC[1], CsvFunctions.class), + // XQuery 4.0 functions — batch 6: subsequence-where, seconds, in-scope-namespaces + new FunctionDef(FnSubsequenceWhere.FN_SUBSEQUENCE_WHERE[0], FnSubsequenceWhere.class), + new FunctionDef(FnSubsequenceWhere.FN_SUBSEQUENCE_WHERE[1], FnSubsequenceWhere.class), + new FunctionDef(FnSeconds.FN_SECONDS, FnSeconds.class), + new FunctionDef(FnInScopeNamespaces.FN_IN_SCOPE_NAMESPACES, FnInScopeNamespaces.class), + // XQuery 4.0 functions — batch 7: transitive-closure, element-to-map + new FunctionDef(FnTransitiveClosure.FN_TRANSITIVE_CLOSURE, FnTransitiveClosure.class), + new FunctionDef(FnElementToMap.FN_ELEMENT_TO_MAP[0], FnElementToMap.class), + new FunctionDef(FnElementToMap.FN_ELEMENT_TO_MAP[1], FnElementToMap.class), + + // --- Invisible XML (feature/fn-invisible-xml) --- + new FunctionDef(FnInvisibleXml.SIGNATURES[0], FnInvisibleXml.class), + new FunctionDef(FnInvisibleXml.SIGNATURES[1], FnInvisibleXml.class) + // --- End Invisible XML --- }; static { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnOp.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnOp.java new file mode 100644 index 00000000000..f0785604f57 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnOp.java @@ -0,0 +1,404 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.AbstractExpression; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionCall; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.UserDefinedFunction; +import org.exist.xquery.ValueComparison; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.ComputableValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:op (XQuery 4.0). + * + * Returns a function reference for a named operator. + */ +public class FnOp extends BasicFunction { + + private static final QName PARAM_A = new QName("a", javax.xml.XMLConstants.NULL_NS_URI); + private static final QName PARAM_B = new QName("b", javax.xml.XMLConstants.NULL_NS_URI); + + public static final FunctionSignature FN_OP = new FunctionSignature( + new QName("op", Function.BUILTIN_FUNCTION_NS), + "Returns a function that applies a given operator.", + new SequenceType[] { + new FunctionParameterSequenceType("operator", Type.STRING, Cardinality.EXACTLY_ONE, "The operator name") + }, + new FunctionReturnSequenceType(Type.FUNCTION, Cardinality.EXACTLY_ONE, "a function implementing the operator")); + + public FnOp(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + private static final ErrorCodes.ErrorCode FOAP0001 = new ErrorCodes.ErrorCode( + "FOAP0001", "Invalid operator name"); + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final String operator = args[0].getStringValue(); + + // Validate operator name + if (!isValidOperator(operator)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Unknown operator: " + operator); + } + + // Create a UserDefinedFunction with 2 parameters ($a, $b) + final FunctionSignature opSig = new FunctionSignature( + new QName("op#" + operator, Function.BUILTIN_FUNCTION_NS), + new SequenceType[] { + new FunctionParameterSequenceType("a", Type.ITEM, Cardinality.ZERO_OR_MORE, "left operand"), + new FunctionParameterSequenceType("b", Type.ITEM, Cardinality.ZERO_OR_MORE, "right operand") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "result")); + + final UserDefinedFunction func = new UserDefinedFunction(context, opSig); + func.addVariable(PARAM_A); + func.addVariable(PARAM_B); + + // Set the body to an expression that evaluates the operator + func.setFunctionBody(new OperatorExpression(context, operator)); + + final FunctionCall call = new FunctionCall(context, func); + call.setLocation(getLine(), getColumn()); + + return new FunctionReference(this, call); + } + + private boolean isValidOperator(final String op) { + switch (op) { + case ",": case "and": case "or": + case "+": case "-": case "*": case "div": case "idiv": case "mod": + case "=": case "<": case "<=": case ">": case ">=": case "!=": + case "eq": case "lt": case "le": case "gt": case "ge": case "ne": + case "<<": case ">>": case "precedes": case "follows": + case "precedes-or-is": case "follows-or-is": + case "is": case "is-not": + case "||": case "|": case "union": case "except": case "intersect": + case "to": case "otherwise": + return true; + default: + return false; + } + } + + /** + * Expression that evaluates an operator on two variables $a and $b + * from the local variable context. + */ + private static class OperatorExpression extends AbstractExpression { + + private final String operator; + + OperatorExpression(final XQueryContext context, final String operator) { + super(context); + this.operator = operator; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence a = context.resolveVariable(PARAM_A).getValue(); + final Sequence b = context.resolveVariable(PARAM_B).getValue(); + + switch (operator) { + // Arithmetic + case "+": return arithmetic(a, b, "plus"); + case "-": return arithmetic(a, b, "minus"); + case "*": return arithmetic(a, b, "mult"); + case "div": return arithmetic(a, b, "div"); + case "idiv": return arithmetic(a, b, "idiv"); + case "mod": return arithmetic(a, b, "mod"); + + // General comparison + case "=": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.EQ); + case "!=": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.NEQ); + case "<": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.LT); + case "<=": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.LTEQ); + case ">": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.GT); + case ">=": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.GTEQ); + + // Value comparison + case "eq": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.EQ); + case "ne": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.NEQ); + case "lt": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.LT); + case "le": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.LTEQ); + case "gt": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.GT); + case "ge": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.GTEQ); + + // Boolean + case "and": return BooleanValue.valueOf(a.effectiveBooleanValue() && b.effectiveBooleanValue()); + case "or": return BooleanValue.valueOf(a.effectiveBooleanValue() || b.effectiveBooleanValue()); + + // String concatenation + case "||": return new StringValue(this, a.getStringValue() + b.getStringValue()); + + // Sequence + case ",": return opComma(a, b); + case "|": + case "union": return opVenn(a, b, "union"); + case "except": return opVenn(a, b, "except"); + case "intersect": return opVenn(a, b, "intersect"); + case "to": return opTo(a, b); + case "otherwise": return a.isEmpty() ? b : a; + + // Node comparison + case "is": return nodeIs(a, b); + case "is-not": return nodeIsNot(a, b); + case "<<": + case "precedes": return nodePrecedes(a, b); + case ">>": + case "follows": return nodeFollows(a, b); + case "precedes-or-is": return nodePrecedesOrIs(a, b); + case "follows-or-is": return nodeFollowsOrIs(a, b); + + default: + throw new XPathException(this, ErrorCodes.FOJS0005, "Unknown operator: " + operator); + } + } + + private Sequence arithmetic(final Sequence a, final Sequence b, final String op) throws XPathException { + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final ComputableValue left = toComputable(a.itemAt(0).atomize()); + final ComputableValue right = toComputable(b.itemAt(0).atomize()); + switch (op) { + case "plus": return left.plus(right); + case "minus": return left.minus(right); + case "mult": return left.mult(right); + case "div": return left.div(right); + case "idiv": return ((org.exist.xquery.value.NumericValue) left).idiv((org.exist.xquery.value.NumericValue) right); + case "mod": return ((org.exist.xquery.value.NumericValue) left).mod((org.exist.xquery.value.NumericValue) right); + default: throw new IllegalStateException(); + } + } + + private Sequence generalCompare(final Sequence a, final Sequence b, + final org.exist.xquery.Constants.Comparison comp) throws XPathException { + // General comparison: existential semantics — true if any pair matches + if (a.isEmpty() || b.isEmpty()) { + return BooleanValue.FALSE; + } + final com.ibm.icu.text.Collator collator = context.getDefaultCollator(); + for (int i = 0; i < a.getItemCount(); i++) { + final org.exist.xquery.value.AtomicValue lv = a.itemAt(i).atomize(); + for (int j = 0; j < b.getItemCount(); j++) { + final org.exist.xquery.value.AtomicValue rv = b.itemAt(j).atomize(); + if (ValueComparison.compareAtomic(collator, lv, rv, + org.exist.xquery.Constants.StringTruncationOperator.NONE, comp)) { + return BooleanValue.TRUE; + } + } + } + return BooleanValue.FALSE; + } + + private Sequence valueCompare(final Sequence a, final Sequence b, + final org.exist.xquery.Constants.Comparison comp) throws XPathException { + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + if (a.getItemCount() > 1 || b.getItemCount() > 1) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Value comparison requires singleton operands"); + } + final org.exist.xquery.value.AtomicValue lv = a.itemAt(0).atomize(); + final org.exist.xquery.value.AtomicValue rv = b.itemAt(0).atomize(); + final com.ibm.icu.text.Collator collator = context.getDefaultCollator(); + return BooleanValue.valueOf(ValueComparison.compareAtomic(collator, lv, rv, + org.exist.xquery.Constants.StringTruncationOperator.NONE, comp)); + } + + private Sequence opComma(final Sequence a, final Sequence b) throws XPathException { + final ValueSequence result = new ValueSequence(a.getItemCount() + b.getItemCount()); + result.addAll(a); + result.addAll(b); + return result; + } + + private Sequence opVenn(final Sequence a, final Sequence b, final String op) throws XPathException { + // Check that operands are nodes + for (int i = 0; i < a.getItemCount(); i++) { + if (!(a.itemAt(i) instanceof org.w3c.dom.Node)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Set operation requires node operands, got " + Type.getTypeName(a.itemAt(i).getType())); + } + } + for (int i = 0; i < b.getItemCount(); i++) { + if (!(b.itemAt(i) instanceof org.w3c.dom.Node)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Set operation requires node operands, got " + Type.getTypeName(b.itemAt(i).getType())); + } + } + try { + switch (op) { + case "union": return a.toNodeSet().union(b.toNodeSet()); + case "except": return a.toNodeSet().except(b.toNodeSet()); + case "intersect": return a.toNodeSet().intersection(b.toNodeSet()); + default: throw new IllegalStateException(); + } + } catch (final XPathException e) { + throw new XPathException(this, ErrorCodes.XPTY0004, e.getMessage()); + } + } + + private Sequence opTo(final Sequence a, final Sequence b) throws XPathException { + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final long start = ((IntegerValue) a.itemAt(0)).getLong(); + final long end = ((IntegerValue) b.itemAt(0)).getLong(); + if (start > end) { + return Sequence.EMPTY_SEQUENCE; + } + final ValueSequence result = new ValueSequence((int) (end - start + 1)); + for (long i = start; i <= end; i++) { + result.add(new IntegerValue(this, i)); + } + return result; + } + + private void checkNodeOperands(final Sequence a, final Sequence b) throws XPathException { + if (!a.isEmpty() && !(a.itemAt(0) instanceof org.w3c.dom.Node)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Node comparison requires node operands, got " + Type.getTypeName(a.itemAt(0).getType())); + } + if (!b.isEmpty() && !(b.itemAt(0) instanceof org.w3c.dom.Node)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Node comparison requires node operands, got " + Type.getTypeName(b.itemAt(0).getType())); + } + } + + private Sequence nodeIs(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(a.itemAt(0).equals(b.itemAt(0))); + } + + private Sequence nodeIsNot(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(!a.itemAt(0).equals(b.itemAt(0))); + } + + private ComputableValue toComputable(final org.exist.xquery.value.AtomicValue v) throws XPathException { + if (v instanceof ComputableValue) { + return (ComputableValue) v; + } + // Untyped atomic → promote to xs:double for arithmetic + if (v.getType() == Type.UNTYPED_ATOMIC) { + return (ComputableValue) v.convertTo(Type.DOUBLE); + } + throw new XPathException(this, ErrorCodes.XPTY0004, + "Cannot use " + Type.getTypeName(v.getType()) + " in arithmetic"); + } + + private int nodeCompare(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + final Item left = a.itemAt(0); + final Item right = b.itemAt(0); + if (left instanceof org.exist.dom.persistent.NodeProxy && right instanceof org.exist.dom.persistent.NodeProxy) { + return ((org.exist.dom.persistent.NodeProxy) left).compareTo((org.exist.dom.persistent.NodeProxy) right); + } + // For in-memory nodes, compare using NodeId if available + if (left instanceof org.exist.dom.memtree.NodeImpl && right instanceof org.exist.dom.memtree.NodeImpl) { + final org.exist.dom.memtree.NodeImpl leftNode = (org.exist.dom.memtree.NodeImpl) left; + final org.exist.dom.memtree.NodeImpl rightNode = (org.exist.dom.memtree.NodeImpl) right; + return Integer.compare(leftNode.getNodeNumber(), rightNode.getNodeNumber()); + } + throw new XPathException(this, ErrorCodes.XPTY0004, + "Node comparison requires node operands"); + } + + private Sequence nodePrecedes(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(nodeCompare(a, b) < 0); + } + + private Sequence nodeFollows(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(nodeCompare(a, b) > 0); + } + + private Sequence nodePrecedesOrIs(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(nodeCompare(a, b) <= 0); + } + + private Sequence nodeFollowsOrIs(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(nodeCompare(a, b) >= 0); + } + + @Override + public int returnsType() { + return Type.ITEM; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + // nothing to analyze + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("op(\"" + operator + "\")"); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseHtml.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseHtml.java new file mode 100644 index 00000000000..f4fee93eb6f --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseHtml.java @@ -0,0 +1,240 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.evolvedbinary.j8fu.Either; +import org.exist.Namespaces; +import org.exist.dom.QName; +import org.exist.dom.memtree.SAXAdapter; +import org.exist.util.HtmlToXmlParser; +import org.exist.validation.ValidationReport; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.XMLFilterImpl; + +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.util.Optional; + +/** + * Implements fn:parse-html (XQuery 4.0). + * + * Parses an HTML string (which may be malformed) into an XDM document node + * with all elements in the XHTML namespace. + */ +public class FnParseHtml extends BasicFunction { + + public static final FunctionSignature[] FN_PARSE_HTML = { + new FunctionSignature( + new QName("parse-html", Function.BUILTIN_FUNCTION_NS), + "Parses the supplied HTML string into an XDM document node. " + + "The input need not be well-formed; it is processed by an HTML parser " + + "that corrects errors and produces well-formed XHTML output.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, + Cardinality.ZERO_OR_ONE, "The HTML to parse (string or binary)") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, + "The parsed XHTML document")), + new FunctionSignature( + new QName("parse-html", Function.BUILTIN_FUNCTION_NS), + "Parses the supplied HTML string into an XDM document node with options. " + + "The input need not be well-formed; it is processed by an HTML parser " + + "that corrects errors and produces well-formed XHTML output.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, + Cardinality.ZERO_OR_ONE, "The HTML to parse (string or binary)"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, + Cardinality.EXACTLY_ONE, "Options map") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, + "The parsed XHTML document")) + }; + + public FnParseHtml(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Extract options if present + boolean failOnError = false; + String encoding = "UTF-8"; + if (getArgumentCount() == 2 && !args[1].isEmpty()) { + final MapType options = (MapType) args[1].itemAt(0); + failOnError = getBooleanOption(options, "fail-on-error", false); + encoding = getStringOption(options, "encoding", "UTF-8"); + + // Validate option types per spec — unknown options with wrong types raise XPTY0004 + validateOptionType(options, "include-template-content"); + validateOptionType(options, "exclude-template-content"); + } + + // Get the HTML content as a string + final String htmlContent = getHtmlContent(args[0].itemAt(0), encoding); + + // Parse with the configured HTML-to-XML parser + return parseHtml(htmlContent, failOnError); + } + + private String getHtmlContent(final Item item, final String encoding) throws XPathException { + if (item instanceof BinaryValue) { + final BinaryValue binary = (BinaryValue) item; + try (final java.io.InputStream is = binary.getInputStream()) { + final Charset charset = Charset.forName(encoding); + return new String(is.readAllBytes(), charset); + } catch (final Exception e) { + throw new XPathException(this, ErrorCodes.FODC0006, + "Error decoding binary value: " + e.getMessage()); + } + } + return item.getStringValue(); + } + + private Sequence parseHtml(final String htmlContent, final boolean failOnError) throws XPathException { + final ValidationReport report = new ValidationReport(); + final SAXAdapter adapter = new SAXAdapter(this, context); + + try { + final Optional> maybeReaderInst = + HtmlToXmlParser.getHtmlToXmlParser(context.getBroker().getConfiguration()); + + if (maybeReaderInst.isEmpty()) { + throw new XPathException(this, ErrorCodes.FODC0006, + "No HTML parser configured in conf.xml"); + } + + final Either readerInst = maybeReaderInst.get(); + if (readerInst.isLeft()) { + throw new XPathException(this, ErrorCodes.FODC0006, + "Unable to instantiate HTML parser: " + readerInst.left().get().getMessage()); + } + + final XMLReader xr = readerInst.right().get(); + + // Configure for XHTML namespace output + try { + xr.setFeature("http://cyberneko.org/html/features/insert-namespaces", true); + } catch (final SAXException e) { + // Feature not supported by this parser — XHTML namespace may be missing + } + + // Configure lowercase element names for XHTML compliance + try { + xr.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); + } catch (final SAXException e) { + // Property not supported + } + try { + xr.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower"); + } catch (final SAXException e) { + // Property not supported + } + + // Use a SAX filter to ensure ALL elements are in XHTML namespace + final XMLFilterImpl xhtmlFilter = new XMLFilterImpl(xr) { + private static final String XHTML_NS = "http://www.w3.org/1999/xhtml"; + @Override + public void startElement(String uri, String localName, String qName, Attributes atts) + throws SAXException { + if (uri == null || uri.isEmpty()) { + uri = XHTML_NS; + } + super.startElement(uri, localName.isEmpty() ? qName : localName, qName, atts); + } + @Override + public void endElement(String uri, String localName, String qName) throws SAXException { + if (uri == null || uri.isEmpty()) { + uri = XHTML_NS; + } + super.endElement(uri, localName.isEmpty() ? qName : localName, qName); + } + }; + + xhtmlFilter.setErrorHandler(report); + xhtmlFilter.setContentHandler(adapter); + xr.setProperty(Namespaces.SAX_LEXICAL_HANDLER, adapter); + + final InputSource src = new InputSource(new StringReader(htmlContent)); + xhtmlFilter.parse(src); + + } catch (final SAXException e) { + if (failOnError) { + throw new XPathException(this, ErrorCodes.FODC0011, + "HTML parsing error: " + e.getMessage()); + } + // Non-fatal: return whatever was parsed + } catch (final IOException e) { + throw new XPathException(this, ErrorCodes.FODC0006, + "Error reading HTML input: " + e.getMessage()); + } + + if (!report.isValid() && failOnError) { + throw new XPathException(this, ErrorCodes.FODC0011, + "HTML parsing error: " + report.toString()); + } + + return adapter.getDocument(); + } + + private boolean getBooleanOption(final MapType options, final String key, + final boolean defaultValue) throws XPathException { + final Sequence value = options.get(new StringValue(key)); + if (value != null && !value.isEmpty()) { + return value.itemAt(0).convertTo(Type.BOOLEAN).effectiveBooleanValue(); + } + return defaultValue; + } + + private String getStringOption(final MapType options, final String key, + final String defaultValue) throws XPathException { + final Sequence value = options.get(new StringValue(key)); + if (value != null && !value.isEmpty()) { + final Item item = value.itemAt(0); + if (!(item instanceof StringValue)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Option '" + key + "' must be a string, got " + Type.getTypeName(item.getType())); + } + return item.getStringValue(); + } + return defaultValue; + } + + private void validateOptionType(final MapType options, final String key) throws XPathException { + final Sequence value = options.get(new StringValue(key)); + if (value != null && !value.isEmpty()) { + // These options are not supported — raise XPTY0004 per spec + throw new XPathException(this, ErrorCodes.XPTY0004, + "Option '" + key + "' is not supported"); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseInteger.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseInteger.java new file mode 100644 index 00000000000..6031d9d161a --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseInteger.java @@ -0,0 +1,142 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import java.math.BigInteger; + +/** + * Implements XQuery 4.0 fn:parse-integer. + * + * fn:parse-integer($value, $radix?) parses a string as an integer in the given radix (2-36). + */ +public class FnParseInteger extends BasicFunction { + + private static final ErrorCodes.ErrorCode FORG0011 = new ErrorCodes.ErrorCode("FORG0011", + "Radix is out of range (must be 2-36)"); + private static final ErrorCodes.ErrorCode FORG0012 = new ErrorCodes.ErrorCode("FORG0012", + "Invalid integer string for the given radix"); + + public static final FunctionSignature[] FN_PARSE_INTEGER = { + new FunctionSignature( + new QName("parse-integer", Function.BUILTIN_FUNCTION_NS), + "Parses a string as an integer in the given radix.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to parse"), + new FunctionParameterSequenceType("radix", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The radix (2-36), default 10") + }, + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_ONE, "the parsed integer")), + new FunctionSignature( + new QName("parse-integer", Function.BUILTIN_FUNCTION_NS), + "Parses a string as a decimal integer.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to parse") + }, + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_ONE, "the parsed integer")) + }; + + public FnParseInteger(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String value = args[0].getStringValue(); + + int radix = 10; + if (args.length > 1 && !args[1].isEmpty()) { + radix = (int) ((IntegerValue) args[1].itemAt(0)).getLong(); + } + + if (radix < 2 || radix > 36) { + throw new XPathException(this, FORG0011, "Radix must be between 2 and 36, got: " + radix); + } + + // Preprocess: strip whitespace and underscores + String stripped = value.replaceAll("[\\s_]", ""); + + if (stripped.isEmpty()) { + throw new XPathException(this, FORG0012, "Empty string after stripping whitespace and underscores"); + } + + // Handle optional sign + boolean negative = false; + if (stripped.charAt(0) == '-') { + negative = true; + stripped = stripped.substring(1); + } else if (stripped.charAt(0) == '+') { + stripped = stripped.substring(1); + } + + if (stripped.isEmpty()) { + throw new XPathException(this, FORG0012, "No digits found after sign"); + } + + // Validate digits for the given radix + final String lowerStripped = stripped.toLowerCase(); + for (int i = 0; i < lowerStripped.length(); i++) { + final char c = lowerStripped.charAt(i); + final int digit; + if (c >= '0' && c <= '9') { + digit = c - '0'; + } else if (c >= 'a' && c <= 'z') { + digit = c - 'a' + 10; + } else { + throw new XPathException(this, FORG0012, + "Invalid character '" + c + "' for radix " + radix); + } + if (digit >= radix) { + throw new XPathException(this, FORG0012, + "Invalid character '" + c + "' for radix " + radix); + } + } + + try { + BigInteger result = new BigInteger(lowerStripped, radix); + if (negative) { + result = result.negate(); + } + return new IntegerValue(this, result); + } catch (final NumberFormatException e) { + throw new XPathException(this, FORG0012, + "Cannot parse '" + value + "' as integer with radix " + radix); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseQName.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseQName.java new file mode 100644 index 00000000000..f44f4df396c --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseQName.java @@ -0,0 +1,174 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.QNameValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import javax.xml.XMLConstants; + +/** + * Implements XQuery 4.0 fn:parse-QName. + * + * fn:parse-QName($value as xs:string?) as xs:QName? + * + * Parses an EQName string to xs:QName. Supports: + * - NCName (no namespace) + * - prefix:local (resolved from static context) + * - Q{uri}local (URIQualifiedName) + */ +public class FnParseQName extends BasicFunction { + + private static final ErrorCodes.ErrorCode FOCA0002 = new ErrorCodes.ErrorCode("FOCA0002", + "Invalid lexical form for xs:QName"); + + public static final FunctionSignature FN_PARSE_QNAME = new FunctionSignature( + new QName("parse-QName", Function.BUILTIN_FUNCTION_NS), + "Parses an EQName string to xs:QName.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The EQName string to parse") + }, + new FunctionReturnSequenceType(Type.QNAME, Cardinality.ZERO_OR_ONE, "the parsed QName")); + + public FnParseQName(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String value = args[0].getStringValue().strip(); + + if (value.isEmpty()) { + throw new XPathException(this, FOCA0002, "Empty string is not a valid EQName"); + } + + // URIQualifiedName: Q{uri}local + if (value.startsWith("Q{")) { + return parseURIQualifiedName(value); + } + + // Prefixed QName: prefix:local + final int colon = value.indexOf(':'); + if (colon > 0) { + final String prefix = value.substring(0, colon); + final String local = value.substring(colon + 1); + + if (!isNCName(prefix) || !isNCName(local)) { + throw new XPathException(this, FOCA0002, + "Invalid prefixed QName: " + value); + } + + final String uri = context.getURIForPrefix(prefix); + if (uri == null || uri.isEmpty()) { + throw new XPathException(this, ErrorCodes.FONS0004, + "Undeclared prefix: " + prefix); + } + + return new QNameValue(this, context, new QName(local, uri, prefix)); + } + + // NCName (no namespace) + if (!isNCName(value)) { + throw new XPathException(this, FOCA0002, + "Invalid NCName: " + value); + } + + return new QNameValue(this, context, new QName(value, XMLConstants.NULL_NS_URI)); + } + + private Sequence parseURIQualifiedName(final String value) throws XPathException { + final int closeBrace = value.indexOf('}'); + if (closeBrace < 0) { + throw new XPathException(this, FOCA0002, + "Missing closing '}' in URIQualifiedName: " + value); + } + + final String uri = value.substring(2, closeBrace); + final String rest = value.substring(closeBrace + 1); + + if (rest.isEmpty()) { + throw new XPathException(this, FOCA0002, + "Missing local name after Q{...}: " + value); + } + + // rest may be prefix:local or just local + final int colon = rest.indexOf(':'); + final String local; + final String prefix; + if (colon > 0) { + prefix = rest.substring(0, colon); + local = rest.substring(colon + 1); + } else { + prefix = XMLConstants.DEFAULT_NS_PREFIX; + local = rest; + } + + if (!isNCName(local) || (colon > 0 && !isNCName(prefix))) { + throw new XPathException(this, FOCA0002, + "Invalid URIQualifiedName: " + value); + } + + return new QNameValue(this, context, new QName(local, uri, prefix)); + } + + private static boolean isNCName(final String s) { + if (s == null || s.isEmpty()) { + return false; + } + if (!isNCNameStart(s.charAt(0))) { + return false; + } + for (int i = 1; i < s.length(); i++) { + if (!isNCNameChar(s.charAt(i))) { + return false; + } + } + return true; + } + + private static boolean isNCNameStart(final char c) { + return Character.isLetter(c) || c == '_'; + } + + private static boolean isNCNameChar(final char c) { + return Character.isLetterOrDigit(c) || c == '.' || c == '-' || c == '_' + || c == '\u00B7' + || (c >= '\u0300' && c <= '\u036F') + || (c >= '\u203F' && c <= '\u2040'); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseUri.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseUri.java new file mode 100644 index 00000000000..5af1d9e7466 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseUri.java @@ -0,0 +1,462 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:parse-uri (XQuery 4.0). + * + * Parses a URI string and returns a map of its constituent parts, + * following the algorithm specified in XPath Functions 4.0. + */ +public class FnParseUri extends BasicFunction { + + private static final ErrorCodes.ErrorCode FOUR0001 = new ErrorCodes.ErrorCode( + "FOUR0001", "Invalid URI"); + + private static final Pattern SCHEME_PATTERN = + Pattern.compile("^([a-zA-Z][A-Za-z0-9+\\-.]+):(.*)$"); + private static final Pattern FRAGMENT_PATTERN = + Pattern.compile("^(.*?)#(.*)$"); + private static final Pattern QUERY_PATTERN = + Pattern.compile("^(.*?)\\?(.*)$"); + private static final Pattern DRIVE_LETTER_PATTERN = + Pattern.compile("^/*([a-zA-Z][:|].*)$"); + private static final Pattern AUTHORITY_PATH_PATTERN = + Pattern.compile("^//([^/]*)(/.*)$"); + private static final Pattern AUTHORITY_ONLY_PATTERN = + Pattern.compile("^//([^/]+)$"); + + // Authority parsing patterns from the spec + private static final Pattern AUTH_IPV6_PATTERN = + Pattern.compile("^(([^@]*)@)?(\\[[^\\]]*\\])(:([^:]*))?$"); + private static final Pattern AUTH_IPV6_OPEN_PATTERN = + Pattern.compile("^(([^@]*)@)?\\[.*$"); + private static final Pattern AUTH_NORMAL_PATTERN = + Pattern.compile("^(([^@]*)@)?([^:]+)(:([^:]*))?$"); + + private static final Set NON_HIERARCHICAL_SCHEMES = new HashSet<>(Arrays.asList( + "mailto", "news", "urn", "tel", "tag", "jar", "data", "javascript", "cid", "mid" + )); + private static final Set HIERARCHICAL_SCHEMES = new HashSet<>(Arrays.asList( + "http", "https", "ftp", "ftps", "sftp", "file", "ssh", "telnet", + "ldap", "ldaps", "svn", "svn+ssh", "git", "s3", "hdfs" + )); + + public static final FunctionSignature[] FN_PARSE_URI = { + new FunctionSignature( + new QName("parse-uri", Function.BUILTIN_FUNCTION_NS), + "Parses a URI and returns a map of its components.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, + Cardinality.ZERO_OR_ONE, "The URI to parse") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, + Cardinality.ZERO_OR_ONE, "map of URI components")), + new FunctionSignature( + new QName("parse-uri", Function.BUILTIN_FUNCTION_NS), + "Parses a URI and returns a map of its components.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, + Cardinality.ZERO_OR_ONE, "The URI to parse"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, + Cardinality.ZERO_OR_ONE, "Options map") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, + Cardinality.ZERO_OR_ONE, "map of URI components")) + }; + + public FnParseUri(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String originalUri = args[0].getStringValue(); + if (originalUri.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Parse options + boolean allowDeprecated = false; + boolean omitDefaultPorts = false; + boolean uncPath = false; + if (args.length > 1 && !args[1].isEmpty()) { + final MapType options = (MapType) args[1].itemAt(0); + allowDeprecated = getBooleanOption(options, "allow-deprecated-features", false); + omitDefaultPorts = getBooleanOption(options, "omit-default-ports", false); + uncPath = getBooleanOption(options, "unc-path", false); + } + + // Step 1: Replace backslashes with forward slashes + String str = originalUri.replace('\\', '/'); + + // Step 2: Strip fragment + String fragment = null; + Matcher m = FRAGMENT_PATTERN.matcher(str); + if (m.matches()) { + str = m.group(1); + fragment = m.group(2); + if (fragment.isEmpty()) { + fragment = null; + } else { + fragment = uriDecode(fragment); + } + } + + // Step 3: Strip query + String query = null; + m = QUERY_PATTERN.matcher(str); + if (m.matches()) { + str = m.group(1); + query = m.group(2); + if (query.isEmpty()) { + query = null; + } + } + + // Step 4: Identify scheme + String scheme = null; + m = SCHEME_PATTERN.matcher(str); + if (m.matches()) { + scheme = m.group(1); + str = m.group(2); + } + + // Step 5: absolute flag — scheme present and no fragment + Boolean absolute = (scheme != null && fragment == null) ? Boolean.TRUE : null; + + // Step 6: Handle file: and drive letters + String filepath = null; + if (scheme == null || "file".equalsIgnoreCase(scheme)) { + m = DRIVE_LETTER_PATTERN.matcher(str); + if (m.matches()) { + scheme = "file"; + String matched = m.group(1); + // Replace | with : if necessary + if (matched.length() > 1 && matched.charAt(1) == '|') { + matched = matched.charAt(0) + ":" + matched.substring(2); + } + str = "/" + matched; + } else if (uncPath && scheme == null) { + scheme = "file"; + } + } + + // Step 7: Determine hierarchical + Boolean hierarchical = null; + if (scheme != null) { + final String schemeLower = scheme.toLowerCase(); + if (HIERARCHICAL_SCHEMES.contains(schemeLower)) { + hierarchical = Boolean.TRUE; + } else if (NON_HIERARCHICAL_SCHEMES.contains(schemeLower)) { + hierarchical = Boolean.FALSE; + } else if (str.isEmpty()) { + hierarchical = null; + } else { + hierarchical = str.startsWith("/"); + } + } else { + // No scheme — hierarchical if starts with / + if (!str.isEmpty()) { + hierarchical = str.startsWith("/"); + } + } + + // Non-hierarchical → absolute is not applicable + if (hierarchical != null && !hierarchical) { + absolute = null; + } + + // Step 8: Handle file: scheme filepath + if ("file".equalsIgnoreCase(scheme)) { + if (uncPath) { + // UNC path handling + final Pattern uncPattern = Pattern.compile("^/*(//[^/].*)$"); + m = uncPattern.matcher(str); + if (m.matches()) { + filepath = m.group(1); + str = filepath; + } + } + if (filepath == null) { + // Check for //X:/ pattern (multiple leading slashes before drive) + if (str.matches("^//*[A-Za-z]:/.*$")) { + // Remove all but one leading slash + str = str.replaceFirst("^/+", "/"); + filepath = str.replaceFirst("^/", ""); + } else { + // Replace multiple leading slashes with single slash + str = str.replaceFirst("^/+", "/"); + filepath = str; + } + } + } + + // Step 9: Extract authority (hierarchical URIs only, NOT file: scheme) + String authority = null; + if (hierarchical != null && hierarchical + && !"file".equalsIgnoreCase(scheme)) { + m = AUTHORITY_ONLY_PATTERN.matcher(str); + if (m.matches()) { + authority = m.group(1); + str = ""; + } else { + m = AUTHORITY_PATH_PATTERN.matcher(str); + if (m.matches()) { + authority = m.group(1); + str = m.group(2); + } + } + // Treat empty authority as absent + if (authority != null && authority.isEmpty()) { + authority = null; + } + } + + // Step 10: Parse authority into userinfo, host, port + String userinfo = null; + String host = null; + Integer port = null; + if (authority != null && !authority.isEmpty()) { + // Parse userinfo + final int atIdx = authority.indexOf('@'); + String authRemainder = authority; + if (atIdx >= 0) { + userinfo = authority.substring(0, atIdx); + authRemainder = authority.substring(atIdx + 1); + // Check for deprecated password + if (!allowDeprecated && userinfo.contains(":")) { + final String password = userinfo.substring(userinfo.indexOf(':') + 1); + if (!password.isEmpty()) { + userinfo = null; + } + } + } + + // Parse host and port from authRemainder + m = AUTH_IPV6_PATTERN.matcher(authority); + if (m.matches()) { + host = m.group(3); + final String portStr = m.group(5); + if (portStr != null && !portStr.isEmpty()) { + try { + port = Integer.parseInt(portStr); + } catch (final NumberFormatException ignored) { + } + } + } else { + m = AUTH_IPV6_OPEN_PATTERN.matcher(authority); + if (m.matches()) { + throw new XPathException(this, FOUR0001, + "Unmatched '[' in URI authority: " + authority); + } + m = AUTH_NORMAL_PATTERN.matcher(authority); + if (m.matches()) { + host = m.group(3); + final String portStr = m.group(5); + if (portStr != null && !portStr.isEmpty()) { + try { + port = Integer.parseInt(portStr); + } catch (final NumberFormatException ignored) { + } + } + } + } + + // Omit default ports + if (omitDefaultPorts && port != null && scheme != null) { + if (isDefaultPort(scheme.toLowerCase(), port)) { + port = null; + } + } + } + + // Step 11: Determine path and filepath + final String path = str.isEmpty() ? null : str; + if (scheme == null && filepath == null && path != null) { + filepath = path; + } + // URI-decode filepath + if (filepath != null) { + filepath = uriDecode(filepath); + } + + // Step 12: Build path-segments + List pathSegments = null; + if (path != null) { + final String[] parts = str.split("/", -1); + pathSegments = new ArrayList<>(parts.length); + for (final String part : parts) { + pathSegments.add(uriDecode(part)); + } + } + + // Step 13: Parse query parameters + MapType queryParams = null; + if (query != null && !query.isEmpty()) { + queryParams = new MapType(this, context); + for (final String param : query.split("&")) { + final int eq = param.indexOf('='); + final String key; + final String value; + if (eq >= 0) { + key = uriDecode(param.substring(0, eq)); + value = uriDecode(param.substring(eq + 1)); + } else { + key = ""; + value = uriDecode(param); + } + final AtomicValue keyVal = new StringValue(this, key); + final Sequence existing = queryParams.get(keyVal); + if (existing != null && !existing.isEmpty()) { + final ValueSequence combined = new ValueSequence(); + combined.addAll(existing); + combined.add(new StringValue(this, value)); + queryParams.add(keyVal, combined); + } else { + queryParams.add(keyVal, new StringValue(this, value)); + } + } + } + + // Build result map — omit keys with empty values per spec + final MapType result = new MapType(this, context); + + // uri (the original input, always present) + result.add(new StringValue(this, "uri"), new StringValue(this, originalUri)); + + if (scheme != null) { + result.add(new StringValue(this, "scheme"), new StringValue(this, scheme)); + } + if (hierarchical != null) { + result.add(new StringValue(this, "hierarchical"), BooleanValue.valueOf(hierarchical)); + } + if (absolute != null) { + result.add(new StringValue(this, "absolute"), BooleanValue.valueOf(absolute)); + } + if (authority != null) { + result.add(new StringValue(this, "authority"), new StringValue(this, authority)); + } + if (userinfo != null) { + result.add(new StringValue(this, "userinfo"), new StringValue(this, userinfo)); + } + if (host != null) { + result.add(new StringValue(this, "host"), new StringValue(this, host)); + } + if (port != null) { + result.add(new StringValue(this, "port"), new IntegerValue(this, port)); + } + if (path != null) { + result.add(new StringValue(this, "path"), new StringValue(this, path)); + } + if (filepath != null) { + result.add(new StringValue(this, "filepath"), new StringValue(this, filepath)); + } + if (pathSegments != null) { + final ValueSequence segSeq = new ValueSequence(pathSegments.size()); + for (final String seg : pathSegments) { + segSeq.add(new StringValue(this, seg)); + } + result.add(new StringValue(this, "path-segments"), segSeq); + } + if (query != null) { + result.add(new StringValue(this, "query"), new StringValue(this, query)); + } + if (queryParams != null) { + result.add(new StringValue(this, "query-parameters"), queryParams); + } else if (query != null) { + result.add(new StringValue(this, "query-parameters"), new MapType(this, context)); + } + if (fragment != null) { + result.add(new StringValue(this, "fragment"), new StringValue(this, fragment)); + } + + return result; + } + + private boolean getBooleanOption(final MapType options, final String key, + final boolean defaultValue) throws XPathException { + final Sequence val = options.get(new StringValue(this, key)); + if (val != null && !val.isEmpty()) { + return val.effectiveBooleanValue(); + } + return defaultValue; + } + + private static boolean isDefaultPort(final String scheme, final int port) { + switch (scheme) { + case "http": return port == 80; + case "https": return port == 443; + case "ftp": return port == 21; + case "ssh": return port == 22; + default: return false; + } + } + + private static String uriDecode(final String s) { + if (s == null || s.isEmpty()) { + return s; + } + if (s.indexOf('%') < 0 && s.indexOf('+') < 0) { + return s; + } + try { + return URLDecoder.decode(s, "UTF-8"); + } catch (final UnsupportedEncodingException | IllegalArgumentException e) { + return s; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnPartialApply.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnPartialApply.java new file mode 100644 index 00000000000..b810ebb87fd --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnPartialApply.java @@ -0,0 +1,188 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.AbstractExpression; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionCall; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.UserDefinedFunction; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import org.exist.xquery.functions.map.AbstractMapType; + +import javax.xml.XMLConstants; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +/** + * Implements XQuery 4.0 fn:partial-apply. + * + * fn:partial-apply($function, $arguments) binds selected arguments to a function, + * returning a partially applied function with reduced arity. + */ +public class FnPartialApply extends BasicFunction { + + public static final FunctionSignature FN_PARTIAL_APPLY = new FunctionSignature( + new QName("partial-apply", Function.BUILTIN_FUNCTION_NS), + "Returns a partially applied function with specified arguments bound.", + new SequenceType[] { + new FunctionParameterSequenceType("function", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The function to partially apply"), + new FunctionParameterSequenceType("arguments", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Map from argument positions (xs:positiveInteger) to values") + }, + new FunctionReturnSequenceType(Type.FUNCTION, Cardinality.EXACTLY_ONE, "the partially applied function")); + + private AnalyzeContextInfo cachedContextInfo; + + public FnPartialApply(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final FunctionReference ref = (FunctionReference) args[0].itemAt(0); + final AbstractMapType argMap = (AbstractMapType) args[1].itemAt(0); + + ref.analyze(cachedContextInfo); + final int originalArity = ref.getSignature().getArgumentCount(); + + // Extract bound arguments from the map (1-based positions) + final Map boundArgs = new TreeMap<>(); + final Sequence keys = argMap.keys(); + for (final SequenceIterator ki = keys.iterate(); ki.hasNext(); ) { + final AtomicValue key = ki.nextItem().atomize(); + final int pos = (int) ((IntegerValue) key).getLong(); + if (pos >= 1 && pos <= originalArity) { + boundArgs.put(pos, argMap.get(key)); + } + } + + if (boundArgs.isEmpty()) { + return ref; + } + + // Build parameter list for the new function (unbound positions only) + final int newArity = originalArity - boundArgs.size(); + final SequenceType[] newParamTypes = new SequenceType[newArity]; + final List variables = new ArrayList<>(); + + int paramIdx = 0; + for (int pos = 1; pos <= originalArity; pos++) { + if (!boundArgs.containsKey(pos)) { + final QName varName = new QName("pa" + paramIdx, XMLConstants.NULL_NS_URI); + variables.add(varName); + newParamTypes[paramIdx] = new FunctionParameterSequenceType( + "pa" + paramIdx, Type.ITEM, Cardinality.ZERO_OR_MORE, "unbound parameter"); + paramIdx++; + } + } + + final QName name = new QName("partial" + hashCode(), XMLConstants.NULL_NS_URI); + final FunctionSignature newSignature = new FunctionSignature(name, newParamTypes, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "result")); + final UserDefinedFunction func = new UserDefinedFunction(context, newSignature); + + for (final QName varName : variables) { + func.addVariable(varName); + } + + // Body expression: resolves variables and bound args, calls the original function + func.setFunctionBody(new PartialCallExpression(context, ref, boundArgs, originalArity, variables)); + + final FunctionCall newCall = new FunctionCall(context, func); + newCall.setLocation(getLine(), getColumn()); + return new FunctionReference(this, newCall); + } + + /** + * Expression that invokes the original function with bound + unbound args assembled. + */ + private static class PartialCallExpression extends AbstractExpression { + private final FunctionReference originalRef; + private final Map boundArgs; + private final int originalArity; + private final List unboundVars; + + PartialCallExpression(final XQueryContext context, final FunctionReference ref, + final Map boundArgs, final int originalArity, + final List unboundVars) { + super(context); + this.originalRef = ref; + this.boundArgs = boundArgs; + this.originalArity = originalArity; + this.unboundVars = unboundVars; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence[] fullArgs = new Sequence[originalArity]; + int unboundIdx = 0; + for (int pos = 1; pos <= originalArity; pos++) { + if (boundArgs.containsKey(pos)) { + fullArgs[pos - 1] = boundArgs.get(pos); + } else { + fullArgs[pos - 1] = context.resolveVariable(unboundVars.get(unboundIdx)).getValue(); + unboundIdx++; + } + } + return originalRef.evalFunction(null, null, fullArgs); + } + + @Override + public int returnsType() { + return Type.ITEM; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("partial-apply(...)"); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnPartition.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnPartition.java new file mode 100644 index 00000000000..872edef1be4 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnPartition.java @@ -0,0 +1,140 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import java.util.ArrayList; +import java.util.List; + +import org.exist.xquery.functions.array.ArrayType; + +/** + * Implements fn:partition (XQuery 4.0). + * + * Splits a sequence into partitions based on a predicate function. + * The predicate receives (current-partition, next-item, position) and + * returns true to start a new partition. + */ +public class FnPartition extends BasicFunction { + + public static final FunctionSignature FN_PARTITION = new FunctionSignature( + new QName("partition", Function.BUILTIN_FUNCTION_NS), + "Splits a sequence into partitions based on a predicate function.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("split-when", Type.FUNCTION, Cardinality.EXACTLY_ONE, + "Predicate: fn(current-partition, next-item, position) as xs:boolean?") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, "sequence of arrays, each containing a partition")); + + private AnalyzeContextInfo cachedContextInfo; + + public FnPartition(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + try (final FunctionReference splitWhen = (FunctionReference) args[1].itemAt(0)) { + splitWhen.analyze(cachedContextInfo); + final int arity = splitWhen.getSignature().getArgumentCount(); + + final List partitions = new ArrayList<>(); + ValueSequence currentPartition = new ValueSequence(); + + int pos = 1; + for (final SequenceIterator i = input.iterate(); i.hasNext(); pos++) { + final Item item = i.nextItem(); + + if (pos == 1) { + // First item always starts the first partition + currentPartition.add(item); + } else { + // Call predicate to decide if we should split + final Sequence splitResult; + if (arity == 1) { + splitResult = splitWhen.evalFunction(null, null, + new Sequence[]{currentPartition}); + } else if (arity == 2) { + splitResult = splitWhen.evalFunction(null, null, + new Sequence[]{currentPartition, item.toSequence()}); + } else { + splitResult = splitWhen.evalFunction(null, null, + new Sequence[]{currentPartition, item.toSequence(), new IntegerValue(this, pos)}); + } + + final boolean split = !splitResult.isEmpty() && splitResult.effectiveBooleanValue(); + if (split) { + partitions.add(currentPartition); + currentPartition = new ValueSequence(); + } + currentPartition.add(item); + } + } + + // Add the last partition + if (!currentPartition.isEmpty()) { + partitions.add(currentPartition); + } + + // Convert to sequence of arrays — each partition item becomes an array member + final ValueSequence result = new ValueSequence(partitions.size()); + for (final ValueSequence partition : partitions) { + final List members = new ArrayList<>(partition.getItemCount()); + for (final SequenceIterator pi = partition.iterate(); pi.hasNext(); ) { + members.add(pi.nextItem().toSequence()); + } + result.add(new ArrayType(this, context, members)); + } + return result; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnReplicate.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnReplicate.java new file mode 100644 index 00000000000..28053ad84b5 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnReplicate.java @@ -0,0 +1,77 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:replicate (XQuery 4.0). + * + * Produces multiple copies of a sequence. + */ +public class FnReplicate extends BasicFunction { + + public static final FunctionSignature FN_REPLICATE = new FunctionSignature( + new QName("replicate", Function.BUILTIN_FUNCTION_NS), + "Produces multiple copies of a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence to replicate"), + new FunctionParameterSequenceType("count", Type.INTEGER, Cardinality.EXACTLY_ONE, "The number of copies") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the replicated sequence")); + + public FnReplicate(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final long count = ((IntegerValue) args[1].itemAt(0)).getLong(); + if (count < 0) { + throw new XPathException(this, ErrorCodes.XPTY0004, "The count argument to fn:replicate must be non-negative, got: " + count); + } + if (count == 0 || input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final int inputSize = input.getItemCount(); + final ValueSequence result = new ValueSequence((int) Math.min(count * inputSize, Integer.MAX_VALUE)); + for (long c = 0; c < count; c++) { + result.addAll(input); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSeconds.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSeconds.java new file mode 100644 index 00000000000..3f953b98757 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSeconds.java @@ -0,0 +1,64 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +/** + * Implements XQuery 4.0 fn:seconds. + * + * Converts a number of seconds (as xs:double) to an xs:dayTimeDuration. + */ +public class FnSeconds extends BasicFunction { + + public static final FunctionSignature FN_SECONDS = new FunctionSignature( + new QName("seconds", Function.BUILTIN_FUNCTION_NS), + "Returns a dayTimeDuration representing the given number of seconds.", + new SequenceType[]{ + new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The number of seconds") + }, + new FunctionReturnSequenceType(Type.DAY_TIME_DURATION, Cardinality.ZERO_OR_ONE, "The duration")); + + public FnSeconds(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final double seconds = ((DoubleValue) args[0].itemAt(0).convertTo(Type.DOUBLE)).getDouble(); + + if (Double.isNaN(seconds) || Double.isInfinite(seconds)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Cannot create duration from " + (Double.isNaN(seconds) ? "NaN" : "Infinity")); + } + + // Convert seconds to dayTimeDuration (constructor takes milliseconds) + final long millis = Math.round(seconds * 1000.0); + return new DayTimeDurationValue(this, millis); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSiblings.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSiblings.java new file mode 100644 index 00000000000..7fa291387ee --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSiblings.java @@ -0,0 +1,111 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +/** + * Implements XQuery 4.0 fn:siblings. + * + * Returns the node together with its siblings in document order. + * If the node has no parent (or is an attribute/namespace), returns just the node itself. + */ +public class FnSiblings extends BasicFunction { + + public static final FunctionSignature[] FN_SIBLINGS = { + new FunctionSignature( + new QName("siblings", Function.BUILTIN_FUNCTION_NS), + "Returns the supplied node together with its siblings in document order.", + new SequenceType[] { + new FunctionParameterSequenceType("node", Type.NODE, Cardinality.ZERO_OR_ONE, "The node whose siblings to return") + }, + new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, "the node and its siblings in document order")), + new FunctionSignature( + new QName("siblings", Function.BUILTIN_FUNCTION_NS), + "Returns the context node together with its siblings in document order.", + new SequenceType[0], + new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, "the context node and its siblings in document order")) + }; + + public FnSiblings(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input; + if (args.length == 0) { + // 0-arity: use context item + if (contextSequence == null || contextSequence.isEmpty()) { + throw new XPathException(this, ErrorCodes.XPDY0002, + "fn:siblings() called with no context item"); + } + input = contextSequence; + } else { + input = args[0]; + } + + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final Item nodeItem = input.itemAt(0); + final int nodeType = nodeItem.getType(); + + // Attribute and namespace nodes: return just the node itself + if (nodeType == Type.ATTRIBUTE || nodeType == Type.NAMESPACE) { + return nodeItem.toSequence(); + } + + final Node node = (Node) nodeItem; + final Node parent = node.getParentNode(); + + // No parent: return just the node + if (parent == null) { + return nodeItem.toSequence(); + } + + // Return all children of the parent (which includes all siblings + the node itself) + final NodeList children = parent.getChildNodes(); + final ValueSequence result = new ValueSequence(children.getLength()); + for (int i = 0; i < children.getLength(); i++) { + result.add((Item) children.item(i)); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSlice.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSlice.java new file mode 100644 index 00000000000..40d27dac51d --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSlice.java @@ -0,0 +1,149 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:slice (XQuery 4.0). + * + * Returns selected items from a sequence based on position, with support for + * negative indexing and step values (Python-style slicing with 1-based indexing). + */ +public class FnSlice extends BasicFunction { + + private static final String DESCRIPTION = "Returns selected items from the input sequence based on their position."; + + public static final FunctionSignature[] FN_SLICE = { + new FunctionSignature( + new QName("slice", Function.BUILTIN_FUNCTION_NS), + DESCRIPTION, + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the selected items")), + new FunctionSignature( + new QName("slice", Function.BUILTIN_FUNCTION_NS), + DESCRIPTION, + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the selected items")), + new FunctionSignature( + new QName("slice", Function.BUILTIN_FUNCTION_NS), + DESCRIPTION, + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position"), + new FunctionParameterSequenceType("end", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The end position") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the selected items")), + new FunctionSignature( + new QName("slice", Function.BUILTIN_FUNCTION_NS), + DESCRIPTION, + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position"), + new FunctionParameterSequenceType("end", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The end position"), + new FunctionParameterSequenceType("step", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The step value") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the selected items")) + }; + + public FnSlice(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final int count = input.getItemCount(); + + // Resolve start + int s; + if (args.length < 2 || args[1].isEmpty() || ((IntegerValue) args[1].itemAt(0)).getLong() == 0) { + s = 1; + } else { + final long sv = ((IntegerValue) args[1].itemAt(0)).getLong(); + s = (int) (sv < 0 ? count + sv + 1 : sv); + } + + // Resolve end + int e; + if (args.length < 3 || args[2].isEmpty() || ((IntegerValue) args[2].itemAt(0)).getLong() == 0) { + e = count; + } else { + final long ev = ((IntegerValue) args[2].itemAt(0)).getLong(); + e = (int) (ev < 0 ? count + ev + 1 : ev); + } + + // Resolve step + int step; + if (args.length < 4 || args[3].isEmpty() || ((IntegerValue) args[3].itemAt(0)).getLong() == 0) { + step = (e >= s) ? 1 : -1; + } else { + step = (int) ((IntegerValue) args[3].itemAt(0)).getLong(); + } + + // Handle negative step: reverse input and recurse with negated positions + if (step < 0) { + final ValueSequence reversed = new ValueSequence(count); + for (int i = count - 1; i >= 0; i--) { + reversed.add(input.itemAt(i)); + } + // slice(reverse($input), -$s, -$e, -$step) + final Sequence[] newArgs = new Sequence[4]; + newArgs[0] = reversed; + newArgs[1] = new IntegerValue(this, -s); + newArgs[2] = new IntegerValue(this, -e); + newArgs[3] = new IntegerValue(this, -step); + return eval(newArgs, contextSequence); + } + + // Positive step: select items where position >= S, position <= E, and (position - S) mod step == 0 + final ValueSequence result = new ValueSequence(); + for (int pos = s; pos <= e && pos <= count; pos += step) { + if (pos >= 1) { + result.add(input.itemAt(pos - 1)); + } + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSortBy.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSortBy.java new file mode 100644 index 00000000000..5c45a9b6879 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSortBy.java @@ -0,0 +1,267 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements XQuery 4.0 fn:sort-by. + * + * fn:sort-by($input, $keys) sorts a sequence based on sort key specifications + * provided as records (maps) with optional key, collation, and order fields. + */ +public class FnSortBy extends BasicFunction { + + public static final FunctionSignature FN_SORT_BY = new FunctionSignature( + new QName("sort-by", Function.BUILTIN_FUNCTION_NS), + "Sorts a sequence based on sort key specifications.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence to sort"), + new FunctionParameterSequenceType("keys", Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, "Sort key records with optional key, collation, and order fields") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the sorted sequence")); + + private AnalyzeContextInfo cachedContextInfo; + + public FnSortBy(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + if (input.getItemCount() <= 1) { + return input; + } + + final Sequence keys = args[1]; + + // Parse sort key specifications + final List sortKeys = new ArrayList<>(); + if (keys.isEmpty()) { + // Default: sort by fn:data#1 ascending + final SortKey defaultKey = new SortKey(); + defaultKey.collator = context.getDefaultCollator(); + sortKeys.add(defaultKey); + } else { + for (final SequenceIterator ki = keys.iterate(); ki.hasNext(); ) { + final AbstractMapType keyMap = (AbstractMapType) ki.nextItem(); + sortKeys.add(parseSortKey(keyMap)); + } + } + + // Collect items + final List items = new ArrayList<>(input.getItemCount()); + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + items.add(i.nextItem()); + } + + // Pre-compute sort keys for each item + final Sequence[][] keyValues = new Sequence[items.size()][sortKeys.size()]; + for (int idx = 0; idx < items.size(); idx++) { + for (int k = 0; k < sortKeys.size(); k++) { + final SortKey sk = sortKeys.get(k); + if (sk.keyFunction != null) { + keyValues[idx][k] = sk.keyFunction.evalFunction(null, null, + new Sequence[]{items.get(idx).toSequence()}); + } else { + final Item item = items.get(idx); + if (item instanceof ArrayType) { + // Arrays use composite sort key: flatten members to atomic values + final ArrayType arr = (ArrayType) item; + final ValueSequence atomized = new ValueSequence(arr.getSize()); + for (int m = 0; m < arr.getSize(); m++) { + final Sequence member = arr.get(m); + for (final SequenceIterator mi = member.iterate(); mi.hasNext(); ) { + atomized.add(mi.nextItem().atomize()); + } + } + keyValues[idx][k] = atomized; + } else { + keyValues[idx][k] = item.atomize().toSequence(); + } + } + } + } + + // Build index array for stable sort + final Integer[] indices = new Integer[items.size()]; + for (int i = 0; i < indices.length; i++) { + indices[i] = i; + } + + try { + java.util.Arrays.sort(indices, (a, b) -> { + try { + for (int k = 0; k < sortKeys.size(); k++) { + final SortKey sk = sortKeys.get(k); + final Sequence va = keyValues[a][k]; + final Sequence vb = keyValues[b][k]; + final int cmp = compareKeys(va, vb, sk.collator); + if (cmp != 0) { + return sk.descending ? -cmp : cmp; + } + } + return 0; + } catch (final XPathException e) { + throw new RuntimeException(e); + } + }); + } catch (final RuntimeException e) { + if (e.getCause() instanceof XPathException) { + throw (XPathException) e.getCause(); + } + throw e; + } + + final ValueSequence result = new ValueSequence(items.size()); + for (final int idx : indices) { + result.add(items.get(idx)); + } + return result; + } + + private int compareKeys(final Sequence a, final Sequence b, final Collator collator) throws XPathException { + final boolean emptyA = a.isEmpty(); + final boolean emptyB = b.isEmpty(); + if (emptyA && emptyB) { + return 0; + } + if (emptyA) { + return -1; // empty precedes non-empty + } + if (emptyB) { + return 1; + } + // Lexicographic comparison for composite sort keys + final int len = Math.min(a.getItemCount(), b.getItemCount()); + for (int i = 0; i < len; i++) { + final AtomicValue va = a.itemAt(i).atomize(); + final AtomicValue vb = b.itemAt(i).atomize(); + // Type check: sort keys must be mutually comparable + checkComparable(va, vb); + final int cmp = FunCompare.compare(va, vb, collator); + if (cmp != 0) { + return cmp; + } + } + // Shorter sequence is less + return Integer.compare(a.getItemCount(), b.getItemCount()); + } + + /** + * Check that two sort key values are of mutually comparable types. + * Per XQ4 spec, it is XPTY0004 if they are not. + */ + private void checkComparable(final AtomicValue va, final AtomicValue vb) throws XPathException { + final int t1 = va.getType(); + final int t2 = vb.getType(); + // Same base type family is always comparable + if (t1 == t2) { + return; + } + // String-like types are mutually comparable + if (isStringLike(t1) && isStringLike(t2)) { + return; + } + // Numeric types are mutually comparable + if (va instanceof org.exist.xquery.value.NumericValue && vb instanceof org.exist.xquery.value.NumericValue) { + return; + } + // Same base type hierarchy is comparable + if (Type.subTypeOf(t1, t2) || Type.subTypeOf(t2, t1)) { + return; + } + throw new XPathException(this, ErrorCodes.XPTY0004, + "Sort key values are not mutually comparable: " + Type.getTypeName(t1) + " and " + Type.getTypeName(t2)); + } + + private static boolean isStringLike(final int type) { + return Type.subTypeOf(type, Type.STRING) + || type == Type.UNTYPED_ATOMIC + || Type.subTypeOf(type, Type.ANY_URI); + } + + private SortKey parseSortKey(final AbstractMapType map) throws XPathException { + final SortKey sk = new SortKey(); + + // key field: function to extract sort key + final Sequence keySeq = map.get(new org.exist.xquery.value.StringValue(this, "key")); + if (keySeq != null && !keySeq.isEmpty()) { + sk.keyFunction = (FunctionReference) keySeq.itemAt(0); + sk.keyFunction.analyze(cachedContextInfo); + } + + // collation field + final Sequence collSeq = map.get(new org.exist.xquery.value.StringValue(this, "collation")); + if (collSeq != null && !collSeq.isEmpty()) { + sk.collator = context.getCollator(collSeq.getStringValue(), ErrorCodes.FOCH0002); + } else { + sk.collator = context.getDefaultCollator(); + } + + // order field: "ascending" (default) or "descending" + final Sequence orderSeq = map.get(new org.exist.xquery.value.StringValue(this, "order")); + if (orderSeq != null && !orderSeq.isEmpty()) { + sk.descending = "descending".equals(orderSeq.getStringValue()); + } + + return sk; + } + + private static class SortKey { + FunctionReference keyFunction; + Collator collator; + boolean descending; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSubsequenceMatching.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSubsequenceMatching.java new file mode 100644 index 00000000000..2ffe00b4a61 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSubsequenceMatching.java @@ -0,0 +1,208 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.Type; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements XQuery 4.0 fn:contains-subsequence, fn:starts-with-subsequence, + * fn:ends-with-subsequence. + */ +public class FnSubsequenceMatching extends BasicFunction { + + public static final FunctionSignature[] FN_CONTAINS_SUBSEQUENCE = { + new FunctionSignature( + new QName("contains-subsequence", Function.BUILTIN_FUNCTION_NS), + "Returns true if the input sequence contains a contiguous subsequence matching the supplied subsequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("subsequence", Type.ITEM, Cardinality.ZERO_OR_MORE, "The subsequence to find") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if input contains the subsequence")), + new FunctionSignature( + new QName("contains-subsequence", Function.BUILTIN_FUNCTION_NS), + "Returns true if the input sequence contains a contiguous subsequence matching the supplied subsequence, using a custom comparison function.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("subsequence", Type.ITEM, Cardinality.ZERO_OR_MORE, "The subsequence to find"), + new FunctionParameterSequenceType("compare", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "The comparison function") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if input contains the subsequence")) + }; + + public static final FunctionSignature[] FN_STARTS_WITH_SUBSEQUENCE = { + new FunctionSignature( + new QName("starts-with-subsequence", Function.BUILTIN_FUNCTION_NS), + "Returns true if the input sequence starts with the supplied subsequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("subsequence", Type.ITEM, Cardinality.ZERO_OR_MORE, "The subsequence to match at start") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if input starts with the subsequence")), + new FunctionSignature( + new QName("starts-with-subsequence", Function.BUILTIN_FUNCTION_NS), + "Returns true if the input sequence starts with the supplied subsequence, using a custom comparison function.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("subsequence", Type.ITEM, Cardinality.ZERO_OR_MORE, "The subsequence to match at start"), + new FunctionParameterSequenceType("compare", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "The comparison function") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if input starts with the subsequence")) + }; + + public static final FunctionSignature[] FN_ENDS_WITH_SUBSEQUENCE = { + new FunctionSignature( + new QName("ends-with-subsequence", Function.BUILTIN_FUNCTION_NS), + "Returns true if the input sequence ends with the supplied subsequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("subsequence", Type.ITEM, Cardinality.ZERO_OR_MORE, "The subsequence to match at end") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if input ends with the subsequence")), + new FunctionSignature( + new QName("ends-with-subsequence", Function.BUILTIN_FUNCTION_NS), + "Returns true if the input sequence ends with the supplied subsequence, using a custom comparison function.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("subsequence", Type.ITEM, Cardinality.ZERO_OR_MORE, "The subsequence to match at end"), + new FunctionParameterSequenceType("compare", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "The comparison function") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if input ends with the subsequence")) + }; + + private AnalyzeContextInfo cachedContextInfo; + + public FnSubsequenceMatching(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final Sequence subsequence = args[1]; + + // Empty subsequence always matches + if (subsequence.isEmpty()) { + return BooleanValue.TRUE; + } + + final int inputLen = input.getItemCount(); + final int subLen = subsequence.getItemCount(); + + // Input shorter than subsequence: can't match + if (inputLen < subLen) { + return BooleanValue.FALSE; + } + + // Get optional compare function + FunctionReference compareRef = null; + if (args.length > 2 && !args[2].isEmpty()) { + compareRef = (FunctionReference) args[2].itemAt(0); + compareRef.analyze(cachedContextInfo); + // Validate arity: comparison function must accept exactly 2 arguments + final int arity = compareRef.getSignature().getArgumentCount(); + if (arity != 2) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Comparison function must accept exactly 2 arguments, but has arity " + arity); + } + } + + try { + // Materialize subsequence for random access + final List subItems = new ArrayList<>(subLen); + for (int i = 0; i < subLen; i++) { + subItems.add(subsequence.itemAt(i)); + } + + if (isCalledAs("starts-with-subsequence")) { + return BooleanValue.valueOf(matchesAt(input, subItems, 0, compareRef)); + } else if (isCalledAs("ends-with-subsequence")) { + return BooleanValue.valueOf(matchesAt(input, subItems, inputLen - subLen, compareRef)); + } else { + // contains-subsequence: try all starting positions + for (int start = 0; start <= inputLen - subLen; start++) { + if (matchesAt(input, subItems, start, compareRef)) { + return BooleanValue.TRUE; + } + } + return BooleanValue.FALSE; + } + } finally { + if (compareRef != null) { + compareRef.close(); + } + } + } + + private boolean matchesAt(final Sequence input, final List subItems, final int start, + final FunctionReference compareRef) throws XPathException { + for (int i = 0; i < subItems.size(); i++) { + final Item inputItem = input.itemAt(start + i); + final Item subItem = subItems.get(i); + if (compareRef != null) { + final Sequence result = compareRef.evalFunction(null, null, + new Sequence[]{inputItem.toSequence(), subItem.toSequence()}); + // XQ4: comparison function must return xs:boolean + if (!result.isEmpty() && result.itemAt(0).getType() != Type.BOOLEAN) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Comparison function must return xs:boolean, but returned " + + Type.getTypeName(result.itemAt(0).getType())); + } + if (result.isEmpty() || !result.effectiveBooleanValue()) { + return false; + } + } else { + // Default: deep-equal semantics + if (!FunDeepEqual.deepEquals(inputItem, subItem, context.getDefaultCollator())) { + return false; + } + } + } + return true; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSubsequenceWhere.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSubsequenceWhere.java new file mode 100644 index 00000000000..03a3c20c025 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnSubsequenceWhere.java @@ -0,0 +1,181 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +/** + * Implements XQuery 4.0 fn:subsequence-where. + * + * Returns items from $input starting from the first item where $from returns true, + * up to and including the first subsequent item where $to returns true. + * + * Also supports fn:subsequence-before and fn:subsequence-after as derived + * convenience functions. + */ +public class FnSubsequenceWhere extends BasicFunction { + + public static final FunctionSignature[] FN_SUBSEQUENCE_WHERE = { + new FunctionSignature( + new QName("subsequence-where", Function.BUILTIN_FUNCTION_NS), + "Returns a contiguous subsequence defined by from/to predicates.", + new SequenceType[]{ + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("from", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "Predicate for start position"), + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "The selected subsequence")), + new FunctionSignature( + new QName("subsequence-where", Function.BUILTIN_FUNCTION_NS), + "Returns a contiguous subsequence defined by from/to predicates.", + new SequenceType[]{ + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("from", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "Predicate for start position"), + new FunctionParameterSequenceType("to", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "Predicate for end position"), + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "The selected subsequence")), + }; + + private AnalyzeContextInfo cachedContextInfo; + + public FnSubsequenceWhere(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // $from predicate (default: match first item) + FunctionReference fromRef = null; + if (args.length >= 2 && !args[1].isEmpty()) { + fromRef = (FunctionReference) args[1].itemAt(0); + fromRef.analyze(cachedContextInfo); + } + + // $to predicate (default: no end match, include all remaining) + FunctionReference toRef = null; + if (args.length >= 3 && !args[2].isEmpty()) { + toRef = (FunctionReference) args[2].itemAt(0); + toRef.analyze(cachedContextInfo); + } + + try { + final int len = input.getItemCount(); + + // Find start index: first item where $from returns true + int startIdx = -1; + if (fromRef == null) { + startIdx = 0; // default: start from first item + } else { + for (int i = 0; i < len; i++) { + if (callPredicate(fromRef, input.itemAt(i), i + 1)) { + startIdx = i; + break; + } + } + } + + if (startIdx < 0) { + return Sequence.EMPTY_SEQUENCE; + } + + // Find end index: first item at or after start where $to returns true + int endIdx = len - 1; // default: include all to end + if (toRef != null) { + endIdx = -1; + for (int i = startIdx; i < len; i++) { + if (callPredicate(toRef, input.itemAt(i), i + 1)) { + endIdx = i; + break; + } + } + if (endIdx < 0) { + // No match for $to — per spec, include all remaining + endIdx = len - 1; + } + } + + // Build result + final ValueSequence result = new ValueSequence(endIdx - startIdx + 1); + for (int i = startIdx; i <= endIdx; i++) { + result.add(input.itemAt(i)); + } + return result; + + } finally { + if (fromRef != null) { + fromRef.close(); + } + if (toRef != null) { + toRef.close(); + } + } + } + + private boolean callPredicate(final FunctionReference ref, final Item item, final int position) throws XPathException { + final int arity = ref.getSignature().getArgumentCount(); + final Sequence result; + if (arity == 0) { + result = ref.evalFunction(null, null, new Sequence[]{}); + } else if (arity == 1) { + result = ref.evalFunction(null, null, new Sequence[]{item.toSequence()}); + } else { + result = ref.evalFunction(null, null, new Sequence[]{item.toSequence(), new IntegerValue(this, position)}); + } + + if (result.isEmpty()) { + return false; + } + + // Must be xs:boolean — EBV of other types is not allowed + final Item resultItem = result.itemAt(0); + if (resultItem.getType() == Type.BOOLEAN) { + return ((BooleanValue) resultItem).getValue(); + } + + // Check if it's a map (maps can be used as predicates) + if (resultItem instanceof org.exist.xquery.functions.map.AbstractMapType) { + // Map used as predicate: look up the item in the map + final org.exist.xquery.functions.map.AbstractMapType map = + (org.exist.xquery.functions.map.AbstractMapType) resultItem; + final Sequence mapResult = map.get((AtomicValue) item.atomize()); + if (mapResult == null || mapResult.isEmpty()) { + return false; + } + return mapResult.effectiveBooleanValue(); + } + + throw new XPathException(this, ErrorCodes.XPTY0004, + "Predicate in subsequence-where must return xs:boolean, got " + Type.getTypeName(resultItem.getType())); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTransitiveClosure.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTransitiveClosure.java new file mode 100644 index 00000000000..1970470e7c1 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTransitiveClosure.java @@ -0,0 +1,143 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +import java.util.LinkedHashSet; +import java.util.Set; + +/** + * Implements XQuery 4.0 fn:transitive-closure. + * + * Applies a step function repeatedly starting from an initial set of items, + * accumulating results until no new items are produced. Handles cycles + * through deduplication. + */ +public class FnTransitiveClosure extends BasicFunction { + + private AnalyzeContextInfo cachedContextInfo; + + public static final FunctionSignature FN_TRANSITIVE_CLOSURE = new FunctionSignature( + new QName("transitive-closure", Function.BUILTIN_FUNCTION_NS), + "Returns the transitive closure of applying $step to $input. " + + "The step function is applied repeatedly until no new items are produced.", + new SequenceType[]{ + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial input sequence"), + new FunctionParameterSequenceType("step", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The step function") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "The transitive closure")); + + public FnTransitiveClosure(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final FunctionReference stepRef = (FunctionReference) args[1].itemAt(0); + stepRef.analyze(cachedContextInfo); + + try { + // Apply step to initial input to get first results + Sequence current = applyStep(stepRef, input); + if (current.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Track all seen items using identity (for nodes) or value (for atomics) + final ValueSequence allResults = new ValueSequence(); + final Set seen = new LinkedHashSet<>(); + + // Add initial step results + addNewItems(current, allResults, seen); + + // Iterate until no new items are found + while (true) { + final Sequence nextStep = applyStep(stepRef, current); + if (nextStep.isEmpty()) { + break; + } + + final ValueSequence newItems = new ValueSequence(); + for (final SequenceIterator i = nextStep.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final Object key = itemKey(item); + if (seen.add(key)) { + newItems.add(item); + allResults.add(item); + } + } + + if (newItems.isEmpty()) { + break; + } + current = newItems; + } + + return allResults; + } finally { + stepRef.close(); + } + } + + private Sequence applyStep(final FunctionReference stepRef, final Sequence input) throws XPathException { + final ValueSequence result = new ValueSequence(); + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final Sequence stepResult = stepRef.evalFunction(null, null, new Sequence[]{item.toSequence()}); + result.addAll(stepResult); + } + return result; + } + + private static void addNewItems(final Sequence seq, final ValueSequence target, final Set seen) throws XPathException { + for (final SequenceIterator i = seq.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final Object key = itemKey(item); + if (seen.add(key)) { + target.add(item); + } + } + } + + private static Object itemKey(final Item item) { + if (item instanceof NodeValue) { + // Use the node itself for identity-based deduplication + return ((NodeValue) item).getNode(); + } + // For atomic values, use the value itself + return item; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTypeAnnotation.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTypeAnnotation.java new file mode 100644 index 00000000000..579ec116888 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTypeAnnotation.java @@ -0,0 +1,424 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.AbstractExpression; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionCall; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.UserDefinedFunction; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.QNameValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import org.exist.xquery.functions.map.MapType; + +import javax.xml.XMLConstants; +import java.util.ArrayList; +import java.util.List; + +/** + * Implements XQuery 4.0 fn:atomic-type-annotation and fn:node-type-annotation. + * + * Returns a schema-type-record (map) with function-valued entries: base-type, primitive-type, + * matches, constructor. The entire ancestor chain is pre-computed to avoid issues with + * nested function evaluation contexts. + */ +public class FnTypeAnnotation extends BasicFunction { + + private static final String XS_NS = "http://www.w3.org/2001/XMLSchema"; + + public static final FunctionSignature FN_ATOMIC_TYPE_ANNOTATION = new FunctionSignature( + new QName("atomic-type-annotation", Function.BUILTIN_FUNCTION_NS), + "Returns a record describing the type annotation of an atomic value.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The atomic value to inspect") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "schema-type-record")); + + public static final FunctionSignature FN_NODE_TYPE_ANNOTATION = new FunctionSignature( + new QName("node-type-annotation", Function.BUILTIN_FUNCTION_NS), + "Returns a record describing the type annotation of an element or attribute node.", + new SequenceType[] { + new FunctionParameterSequenceType("node", Type.NODE, Cardinality.EXACTLY_ONE, "The element or attribute node to inspect") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "schema-type-record")); + + public FnTypeAnnotation(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Item item = args[0].itemAt(0); + + final int type; + final boolean isSimple; + if (isCalledAs("atomic-type-annotation")) { + type = item.getType(); + isSimple = true; + } else { + // Non-schema-aware: elements → xs:untyped, attributes → xs:untypedAtomic + if (item.getType() == Type.ATTRIBUTE) { + type = Type.UNTYPED_ATOMIC; + isSimple = true; + } else { + type = Type.UNTYPED; + isSimple = false; + } + } + + // Pre-compute the full ancestor chain bottom-up + return buildRecordChain(type, isSimple); + } + + /** + * Build the complete record chain from the given type up to xs:anyType. + * Each record's base-type function returns the pre-built parent record. + * + * Uses the XML Schema type hierarchy (not the XPath item() hierarchy) + * since type annotations follow the schema derivation chain. + */ + private MapType buildRecordChain(final int type, final boolean isSimple) throws XPathException { + // Collect ancestor chain using XML Schema hierarchy + final List chain = new ArrayList<>(); + chain.add(new int[]{type, isSimple ? 1 : 0}); + + int current = type; + while (current != Type.ANY_TYPE) { + final int parent = schemaBaseType(current); + if (parent == current) { + break; + } + final boolean parentSimple = isSchemaSimpleType(parent); + chain.add(new int[]{parent, parentSimple ? 1 : 0}); + current = parent; + } + + // Build records top-down (from anyType to target type) + // so each record can reference its pre-built parent record + MapType parentRecord = null; + for (int i = chain.size() - 1; i >= 0; i--) { + final int t = chain.get(i)[0]; + final boolean simple = chain.get(i)[1] == 1; + parentRecord = buildSingleRecord(t, simple, parentRecord, type); + } + + return parentRecord; + } + + /** + * Get the XML Schema base type for a given type. + * Unlike Type.getSuperType which follows the XPath item() hierarchy, + * this follows the XML Schema derivation chain: + * anyAtomicType → anySimpleType → anyType (not item()). + */ + private static int schemaBaseType(final int type) { + switch (type) { + case Type.ANY_TYPE: + return Type.ANY_TYPE; // root + case Type.UNTYPED: + return Type.ANY_TYPE; + case Type.ANY_SIMPLE_TYPE: + return Type.ANY_TYPE; + case Type.ANY_ATOMIC_TYPE: + return Type.ANY_SIMPLE_TYPE; // key fix: not item() + case Type.UNTYPED_ATOMIC: + return Type.ANY_ATOMIC_TYPE; + default: + // For other types, use the normal hierarchy but redirect through schema chain + final int parent = Type.getSuperType(type); + // If parent is ITEM, redirect to ANY_ATOMIC_TYPE (for atomic types) or ANY_TYPE + if (parent == Type.ITEM) { + return Type.ANY_ATOMIC_TYPE; + } + return parent; + } + } + + /** + * Determine if a type is "simple" in the XML Schema sense. + */ + private static boolean isSchemaSimpleType(final int type) { + if (type == Type.ANY_TYPE || type == Type.UNTYPED) { + return false; + } + if (type == Type.ANY_SIMPLE_TYPE || type == Type.ANY_ATOMIC_TYPE || type == Type.UNTYPED_ATOMIC) { + return true; + } + // Walk up to see if we eventually reach ANY_ATOMIC_TYPE or ANY_SIMPLE_TYPE + int current = type; + for (int i = 0; i < 20; i++) { // safety limit + final int parent = Type.getSuperType(current); + if (parent == current) break; + if (parent == Type.ANY_ATOMIC_TYPE || parent == Type.ANY_SIMPLE_TYPE) { + return true; + } + if (parent == Type.ITEM || parent == Type.ANY_TYPE) { + return false; + } + current = parent; + } + return false; + } + + /** + * Build a single schema-type-record for a given type, + * with base-type returning the pre-built parent record. + * + * @param type the eXist type constant + * @param isSimple whether this is a simple type + * @param parentRecord pre-built parent record (or null for root) + * @param leafType the original leaf type (for primitive-type calculation) + */ + private MapType buildSingleRecord(final int type, final boolean isSimple, + final MapType parentRecord, final int leafType) throws XPathException { + final MapType result = new MapType(this, context); + + // name: xs:QName + final QName typeName = typeToQName(type); + if (typeName != null) { + result.add(new StringValue(this, "name"), + new QNameValue(this, context, typeName)); + } + + // is-simple: xs:boolean + result.add(new StringValue(this, "is-simple"), BooleanValue.valueOf(isSimple)); + + // variety: xs:string + if (isSimple) { + result.add(new StringValue(this, "variety"), new StringValue(this, "atomic")); + } else { + result.add(new StringValue(this, "variety"), new StringValue(this, "mixed")); + } + + // base-type: function() as schema-type-record? + // Returns the pre-built parent record, or empty sequence for root types + final Sequence baseTypeResult = parentRecord != null ? parentRecord : Sequence.EMPTY_SEQUENCE; + result.add(new StringValue(this, "base-type"), makeConstantFunction("base-type-" + type, 0, baseTypeResult)); + + // For simple types: add primitive-type, matches, constructor + if (isSimple) { + // primitive-type: find the primitive ancestor type and build its record + final int primitiveType = findPrimitiveType(type); + if (primitiveType != type) { + // Build a standalone record for the primitive type + result.add(new StringValue(this, "primitive-type"), + makeConstantFunction("primitive-type-" + type, 0, buildPrimitiveRecord(primitiveType))); + } else { + // This IS the primitive type — return self + // Use a lazy self-reference via deferred evaluation + result.add(new StringValue(this, "primitive-type"), + makeConstantFunction("primitive-type-" + type, 0, result)); + } + + // matches: function($value) as xs:boolean + result.add(new StringValue(this, "matches"), makeMatchesFunction(type)); + + // constructor: function($value) as xs:atomic + result.add(new StringValue(this, "constructor"), makeConstructorFunction(type)); + } + + return result; + } + + /** + * Build a minimal schema-type-record for the primitive type. + */ + private MapType buildPrimitiveRecord(final int type) throws XPathException { + // Primitive types have base-type = anyAtomicType + // Build a simple chain: primitiveType → anyAtomicType → anySimpleType → anyType + return buildRecordChain(type, true); + } + + /** + * Create a zero-arg function that returns a constant sequence. + */ + private FunctionReference makeConstantFunction(final String name, final int arity, final Sequence value) throws XPathException { + final QName fnName = new QName(name, XMLConstants.NULL_NS_URI); + final FunctionSignature sig = new FunctionSignature(fnName, new SequenceType[0], + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "result")); + final UserDefinedFunction func = new UserDefinedFunction(context, sig); + func.setFunctionBody(new ConstantExpression(context, value)); + final FunctionCall call = new FunctionCall(context, func); + call.setLocation(getLine(), getColumn()); + return new FunctionReference(this, call); + } + + private FunctionReference makeMatchesFunction(final int type) throws XPathException { + final QName fnName = new QName("matches-" + type, XMLConstants.NULL_NS_URI); + final QName paramName = new QName("value", XMLConstants.NULL_NS_URI); + final FunctionSignature sig = new FunctionSignature(fnName, + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.EXACTLY_ONE, "value to test") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if value matches")); + final UserDefinedFunction func = new UserDefinedFunction(context, sig); + func.addVariable(paramName); + func.setFunctionBody(new MatchesExpression(context, type, paramName)); + final FunctionCall call = new FunctionCall(context, func); + call.setLocation(getLine(), getColumn()); + return new FunctionReference(this, call); + } + + private FunctionReference makeConstructorFunction(final int type) throws XPathException { + final QName fnName = new QName("constructor-" + type, XMLConstants.NULL_NS_URI); + final QName paramName = new QName("value", XMLConstants.NULL_NS_URI); + final FunctionSignature sig = new FunctionSignature(fnName, + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.EXACTLY_ONE, "value to cast") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.EXACTLY_ONE, "cast value")); + final UserDefinedFunction func = new UserDefinedFunction(context, sig); + func.addVariable(paramName); + func.setFunctionBody(new ConstructorExpression(context, type, paramName)); + final FunctionCall call = new FunctionCall(context, func); + call.setLocation(getLine(), getColumn()); + return new FunctionReference(this, call); + } + + private static QName typeToQName(final int type) { + final String name = Type.getTypeName(type); + if (name == null) { + return null; + } + final String local; + if (name.startsWith("xs:")) { + local = name.substring(3); + } else { + local = name; + } + return new QName(local, XS_NS, "xs"); + } + + private static int findPrimitiveType(final int type) { + if (type == Type.ANY_ATOMIC_TYPE || type == Type.ANY_SIMPLE_TYPE || type == Type.ANY_TYPE) { + return type; + } + int current = type; + while (true) { + final int parent = Type.getSuperType(current); + if (parent == Type.ANY_ATOMIC_TYPE || parent == current) { + return current; + } + current = parent; + } + } + + + // === Inner expression classes === + + private static class ConstantExpression extends AbstractExpression { + private final Sequence value; + + ConstantExpression(final XQueryContext context, final Sequence value) { + super(context); + this.value = value; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) { + return value; + } + + @Override + public int returnsType() { return Type.ITEM; } + @Override + public void analyze(final org.exist.xquery.AnalyzeContextInfo contextInfo) {} + @Override + public void dump(final org.exist.xquery.util.ExpressionDumper dumper) { dumper.display("constant"); } + @Override + public String toString() { return "constant"; } + } + + private static class MatchesExpression extends AbstractExpression { + private final int targetType; + private final QName paramName; + + MatchesExpression(final XQueryContext context, final int targetType, final QName paramName) { + super(context); + this.targetType = targetType; + this.paramName = paramName; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence value = context.resolveVariable(paramName).getValue(); + if (value.isEmpty()) { + return BooleanValue.FALSE; + } + final Item item = value.itemAt(0); + return BooleanValue.valueOf(Type.subTypeOf(item.getType(), targetType)); + } + + @Override + public int returnsType() { return Type.BOOLEAN; } + @Override + public void analyze(final org.exist.xquery.AnalyzeContextInfo contextInfo) {} + @Override + public void dump(final org.exist.xquery.util.ExpressionDumper dumper) { dumper.display("matches()"); } + @Override + public String toString() { return "matches()"; } + } + + private static class ConstructorExpression extends AbstractExpression { + private final int targetType; + private final QName paramName; + + ConstructorExpression(final XQueryContext context, final int targetType, final QName paramName) { + super(context); + this.targetType = targetType; + this.paramName = paramName; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence value = context.resolveVariable(paramName).getValue(); + if (value.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final Item item = value.itemAt(0); + return item.convertTo(targetType); + } + + @Override + public int returnsType() { return targetType; } + @Override + public void analyze(final org.exist.xquery.AnalyzeContextInfo contextInfo) {} + @Override + public void dump(final org.exist.xquery.util.ExpressionDumper dumper) { dumper.display("constructor()"); } + @Override + public String toString() { return "constructor()"; } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTypeOf.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTypeOf.java new file mode 100644 index 00000000000..0420378474a --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnTypeOf.java @@ -0,0 +1,146 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import java.util.LinkedHashSet; +import java.util.Set; + +/** + * Implements XQuery 4.0 fn:type-of. + * + * Returns a string representation of the type of the supplied value, + * matching the SequenceType grammar. + */ +public class FnTypeOf extends BasicFunction { + + public static final FunctionSignature FN_TYPE_OF = new FunctionSignature( + new QName("type-of", Function.BUILTIN_FUNCTION_NS), + "Returns a string describing the type of the supplied value.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_MORE, "The value to inspect") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "string matching SequenceType grammar")); + + public FnTypeOf(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence value = args[0]; + + if (value.isEmpty()) { + return new StringValue(this, "empty-sequence()"); + } + + // Collect distinct type strings, preserving order + final Set typeStrings = new LinkedHashSet<>(); + for (final SequenceIterator i = value.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + typeStrings.add(itemTypeString(item)); + } + + final StringBuilder sb = new StringBuilder(); + if (typeStrings.size() > 1) { + sb.append('('); + } + boolean first = true; + for (final String ts : typeStrings) { + if (!first) { + sb.append('|'); + } + sb.append(ts); + first = false; + } + if (typeStrings.size() > 1) { + sb.append(')'); + } + + // Occurrence indicator + final int count = value.getItemCount(); + if (count > 1) { + sb.append('+'); + } + + return new StringValue(this, sb.toString()); + } + + private String itemTypeString(final Item item) { + final int type = item.getType(); + + // Node types + switch (type) { + case Type.DOCUMENT: + return "document-node()"; + case Type.ELEMENT: + return "element()"; + case Type.ATTRIBUTE: + return "attribute()"; + case Type.TEXT: + return "text()"; + case Type.PROCESSING_INSTRUCTION: + return "processing-instruction()"; + case Type.COMMENT: + return "comment()"; + case Type.NAMESPACE: + return "namespace-node()"; + } + + // Function types + if (Type.subTypeOf(type, Type.ARRAY_ITEM)) { + return "array(*)"; + } + if (Type.subTypeOf(type, Type.MAP_ITEM)) { + return "map(*)"; + } + if (Type.subTypeOf(type, Type.FUNCTION)) { + return "fn(*)"; + } + + // Atomic types: getTypeName already includes the xs: prefix + if (Type.subTypeOf(type, Type.ANY_ATOMIC_TYPE)) { + final String typeName = Type.getTypeName(type); + if (typeName != null) { + return typeName; + } + return "xs:anyAtomicType"; + } + + return "item()"; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnUnixDateTime.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnUnixDateTime.java new file mode 100644 index 00000000000..11f946e1531 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnUnixDateTime.java @@ -0,0 +1,86 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.DateTimeValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; + +/** + * Implements XQuery 4.0 fn:unix-dateTime. + * + * fn:unix-dateTime($value as xs:nonNegativeInteger?) as xs:dateTimeStamp + * Converts milliseconds since Unix epoch to xs:dateTime with UTC timezone. + */ +public class FnUnixDateTime extends BasicFunction { + + public static final FunctionSignature[] FN_UNIX_DATETIME = { + new FunctionSignature( + new QName("unix-dateTime", Function.BUILTIN_FUNCTION_NS), + "Converts Unix time in milliseconds to xs:dateTime in UTC.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.INTEGER, Cardinality.ZERO_OR_ONE, "Unix time in milliseconds since epoch") + }, + new FunctionReturnSequenceType(Type.DATE_TIME, Cardinality.EXACTLY_ONE, "the corresponding dateTime in UTC")), + new FunctionSignature( + new QName("unix-dateTime", Function.BUILTIN_FUNCTION_NS), + "Returns the Unix epoch (1970-01-01T00:00:00Z).", + new SequenceType[] { + }, + new FunctionReturnSequenceType(Type.DATE_TIME, Cardinality.EXACTLY_ONE, "the Unix epoch")) + }; + + public FnUnixDateTime(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + long millis = 0; + if (args.length > 0 && !args[0].isEmpty()) { + millis = ((IntegerValue) args[0].itemAt(0)).getLong(); + } + + final ZonedDateTime zdt = Instant.ofEpochMilli(millis).atZone(ZoneOffset.UTC); + // Format with explicit seconds and optional milliseconds + final long ms = millis % 1000; + final String pattern = (ms != 0) ? "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'" : "yyyy-MM-dd'T'HH:mm:ss'Z'"; + final String isoStr = DateTimeFormatter.ofPattern(pattern).format(zdt); + return new DateTimeValue(this, isoStr); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnUnparsedBinary.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnUnparsedBinary.java new file mode 100644 index 00000000000..4da5d1b310d --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnUnparsedBinary.java @@ -0,0 +1,114 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.source.FileSource; +import org.exist.source.Source; +import org.exist.source.SourceFactory; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URISyntaxException; + +/** + * fn:unparsed-binary($uri as xs:string?) as xs:base64Binary? + * Loads binary content from a URI and returns it as xs:base64Binary. + */ +public class FnUnparsedBinary extends BasicFunction { + + public static final FunctionSignature FN_UNPARSED_BINARY = new FunctionSignature( + new QName("unparsed-binary", Function.BUILTIN_FUNCTION_NS), + "Loads binary content from a URI and returns it as xs:base64Binary.", + new SequenceType[] { + new FunctionParameterSequenceType("uri", Type.STRING, + Cardinality.ZERO_OR_ONE, "The URI of the binary resource") + }, + new FunctionReturnSequenceType(Type.BASE64_BINARY, Cardinality.ZERO_OR_ONE, + "The binary content")); + + public FnUnparsedBinary(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String uriParam = args[0].getStringValue(); + + try { + URI uri = new URI(uriParam); + + // Resolve relative URIs against file: base URI + boolean resolvedFromBaseUri = false; + if (!uri.isAbsolute()) { + final AnyURIValue baseXdmUri = context.getBaseURI(); + if (baseXdmUri != null && !baseXdmUri.equals(AnyURIValue.EMPTY_URI)) { + String baseStr = baseXdmUri.toURI().toString(); + if (baseStr.startsWith("file:")) { + final int lastSlash = baseStr.lastIndexOf('/'); + if (lastSlash >= 0) { + baseStr = baseStr.substring(0, lastSlash + 1); + } + uri = new URI(baseStr).resolve(uri); + resolvedFromBaseUri = true; + } + } + } + + final String resolvedUri = uri.toASCIIString(); + + // Handle file: URIs directly (only for resolved relative paths) + if (resolvedFromBaseUri && resolvedUri.startsWith("file:")) { + final String filePath = resolvedUri.replaceFirst("^file:(?://[^/]*)?", ""); + final java.nio.file.Path path = java.nio.file.Paths.get(filePath); + if (java.nio.file.Files.isReadable(path)) { + try (final InputStream is = java.nio.file.Files.newInputStream(path)) { + return BinaryValueFromInputStream.getInstance(context, + new Base64BinaryValueType(), is, this); + } + } + throw new XPathException(this, ErrorCodes.FOUT1170, + "Could not find binary resource: " + uriParam); + } + + // Use SourceFactory for other URIs + final Source source = SourceFactory.getSource(context.getBroker(), "", resolvedUri, false); + if (source == null) { + throw new XPathException(this, ErrorCodes.FOUT1170, + "Could not find binary resource: " + uriParam); + } + try (final InputStream is = source.getInputStream()) { + return BinaryValueFromInputStream.getInstance(context, + new Base64BinaryValueType(), is, this); + } + } catch (final IOException | URISyntaxException | org.exist.security.PermissionDeniedException e) { + throw new XPathException(this, ErrorCodes.FOUT1170, e.getMessage()); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAbs.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAbs.java index 81d29fb9b34..01a0f91ebab 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAbs.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAbs.java @@ -48,7 +48,7 @@ public class FunAbs extends Function { "Returns the absolute value of the argument $number." + "If the argument is negative returns -$number otherwise returns $number.", new SequenceType[] { - new FunctionParameterSequenceType("number", Type.NUMERIC, + new FunctionParameterSequenceType("value", Type.NUMERIC, Cardinality.ZERO_OR_ONE, "The number") }, new FunctionReturnSequenceType(Type.NUMERIC, Cardinality.ZERO_OR_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAdjustTimezone.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAdjustTimezone.java index 2bc27fa19c5..197714ac276 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAdjustTimezone.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAdjustTimezone.java @@ -44,10 +44,10 @@ */ public class FunAdjustTimezone extends BasicFunction { - public final static FunctionParameterSequenceType DATE_TIME_01_PARAM = new FunctionParameterSequenceType("date-time", Type.DATE_TIME, Cardinality.ZERO_OR_ONE, "The date-time"); - public final static FunctionParameterSequenceType DATE_01_PARAM = new FunctionParameterSequenceType("date", Type.DATE, Cardinality.ZERO_OR_ONE, "The date"); - public final static FunctionParameterSequenceType TIME_01_PARAM = new FunctionParameterSequenceType("time", Type.TIME, Cardinality.ZERO_OR_ONE, "The time"); - public final static FunctionParameterSequenceType DURATION_01_PARAM = new FunctionParameterSequenceType("duration", Type.DAY_TIME_DURATION, Cardinality.ZERO_OR_ONE, "The duration"); + public final static FunctionParameterSequenceType DATE_TIME_01_PARAM = new FunctionParameterSequenceType("value", Type.DATE_TIME, Cardinality.ZERO_OR_ONE, "The date-time"); + public final static FunctionParameterSequenceType DATE_01_PARAM = new FunctionParameterSequenceType("value", Type.DATE, Cardinality.ZERO_OR_ONE, "The date"); + public final static FunctionParameterSequenceType TIME_01_PARAM = new FunctionParameterSequenceType("value", Type.TIME, Cardinality.ZERO_OR_ONE, "The time"); + public final static FunctionParameterSequenceType DURATION_01_PARAM = new FunctionParameterSequenceType("timezone", Type.DAY_TIME_DURATION, Cardinality.ZERO_OR_ONE, "The duration"); public final static FunctionReturnSequenceType DATE_TIME_01_RETURN = new FunctionReturnSequenceType(Type.DATE_TIME, Cardinality.ZERO_OR_ONE, "the adjusted date-time"); public final static FunctionReturnSequenceType DATE_01_RETURN = new FunctionReturnSequenceType(Type.DATE, Cardinality.ZERO_OR_ONE, "the adjusted date"); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java index 8fe035492a7..01c0b7e60c2 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java @@ -64,7 +64,7 @@ public class FunAnalyzeString extends BasicFunction { "matched substrings, which substrings matched each " + "capturing group in the regular expression.", new SequenceType[] { - new FunctionParameterSequenceType("input", Type.STRING, + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"), new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.EXACTLY_ONE, "The pattern") @@ -80,7 +80,7 @@ public class FunAnalyzeString extends BasicFunction { "matched substrings, which substrings matched each " + "capturing group in the regular expression.", new SequenceType[] { - new FunctionParameterSequenceType("input", Type.STRING, + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"), new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.EXACTLY_ONE, "The pattern"), @@ -158,25 +158,46 @@ private void analyzeString(final MemTreeBuilder builder, final String input, Str private void match(final MemTreeBuilder builder, final RegexIterator regexIterator) throws net.sf.saxon.trans.XPathException { builder.startElement(QN_MATCH, null); - regexIterator.processMatchingSubstring(new RegexIterator.MatchHandler() { - @Override - public void characters(final CharSequence s) { - builder.characters(s); - } - - @Override - public void onGroupStart(final int groupNumber) throws net.sf.saxon.trans.XPathException { - final AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute("", QN_NR.getLocalPart(), QN_NR.getLocalPart(), "int", Integer.toString(groupNumber)); - - builder.startElement(QN_GROUP, attributes); - } - - @Override - public void onGroupEnd(final int groupNumber) throws net.sf.saxon.trans.XPathException { - builder.endElement(); + // Use reflection to avoid compile-time dependency on RegexIterator$MatchHandler, + // which is stripped from the XQTS runner assembly JAR by sbt's merge strategy. + // When running in the normal eXist server (or on the next branch with full Saxon), + // the proxy delegates to Saxon's own group traversal logic. + try { + final Class handlerClass = Class.forName("net.sf.saxon.regex.RegexIterator$MatchHandler"); + final Object handler = java.lang.reflect.Proxy.newProxyInstance( + handlerClass.getClassLoader(), + new Class[]{ handlerClass }, + (proxy, method, args) -> { + switch (method.getName()) { + case "characters": + builder.characters((CharSequence) args[0]); + break; + case "onGroupStart": + final AttributesImpl attrs = new AttributesImpl(); + attrs.addAttribute("", QN_NR.getLocalPart(), QN_NR.getLocalPart(), + "int", Integer.toString((Integer) args[0])); + builder.startElement(QN_GROUP, attrs); + break; + case "onGroupEnd": + builder.endElement(); + break; + } + return null; + }); + final java.lang.reflect.Method processMethod = regexIterator.getClass().getMethod( + "processMatchingSubstring", handlerClass); + processMethod.invoke(regexIterator, handler); + } catch (final ClassNotFoundException e) { + // MatchHandler unavailable — output match text without group decomposition + builder.characters(regexIterator.getRegexGroup(0)); + } catch (final java.lang.reflect.InvocationTargetException e) { + if (e.getCause() instanceof net.sf.saxon.trans.XPathException) { + throw (net.sf.saxon.trans.XPathException) e.getCause(); } - }); + builder.characters(regexIterator.getRegexGroup(0)); + } catch (final Exception e) { + builder.characters(regexIterator.getRegexGroup(0)); + } builder.endElement(); } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunBaseURI.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunBaseURI.java index 4bc9c42a071..db36023811d 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunBaseURI.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunBaseURI.java @@ -57,7 +57,7 @@ public class FunBaseURI extends BasicFunction { ); private static final FunctionParameterSequenceType FS_PARAM_NODE - = optParam("arg", Type.NODE, "The node."); + = optParam("node", Type.NODE, "The node."); static final FunctionSignature FS_BASE_URI_1 = functionSignature( FS_BASE_URI, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunBoolean.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunBoolean.java index 42224bbba1f..52a2fceff27 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunBoolean.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunBoolean.java @@ -48,7 +48,7 @@ public class FunBoolean extends Function { new QName("boolean", Function.BUILTIN_FUNCTION_NS), "Computes the xs:boolean value of the sequence items.", new SequenceType[] { - new FunctionParameterSequenceType("items", Type.ITEM, + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The items") }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCeiling.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCeiling.java index 910ca103a1a..d5ef31f1ef4 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCeiling.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCeiling.java @@ -46,7 +46,7 @@ public class FunCeiling extends Function { "returns the smallest (closest to negative infinity) number " + "with no fractional part that is not less than the value of the argument, $number.", new SequenceType[] { - new FunctionParameterSequenceType("number", Type.NUMERIC, + new FunctionParameterSequenceType("value", Type.NUMERIC, Cardinality.ZERO_OR_ONE, "The number") }, new FunctionReturnSequenceType(Type.NUMERIC, Cardinality.ZERO_OR_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointEqual.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointEqual.java index 3001a9300cc..4ea7d17e7c5 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointEqual.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointEqual.java @@ -57,9 +57,9 @@ public class FunCodepointEqual extends BasicFunction { "is equal to the value of $string-2, according to the Unicode " + "code point collation.", new SequenceType[] { - new FunctionParameterSequenceType("string-1", Type.STRING, + new FunctionParameterSequenceType("value1", Type.STRING, Cardinality.ZERO_OR_ONE, "The first string"), - new FunctionParameterSequenceType("string-2", Type.STRING, + new FunctionParameterSequenceType("value2", Type.STRING, Cardinality.ZERO_OR_ONE, "The second string"), }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.ZERO_OR_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointsToString.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointsToString.java index 4981728b436..6e613a72970 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointsToString.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointsToString.java @@ -61,7 +61,7 @@ public class FunCodepointsToString extends BasicFunction { "If any of the code points in $codepoints is not a " + "legal XML character, an error is raised", new SequenceType[] { - new FunctionParameterSequenceType("codepoints", Type.INTEGER, + new FunctionParameterSequenceType("values", Type.INTEGER, Cardinality.ZERO_OR_MORE, "The codepoints as a sequence of xs:integer values"), }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCollationKey.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCollationKey.java index b068ed3c873..8413a9c8f49 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCollationKey.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCollationKey.java @@ -45,8 +45,8 @@ public class FunCollationKey extends BasicFunction { "keys reflects the matching and ordering of strings " + "under the specified collation."; private static final FunctionReturnSequenceType FN_RETURN = returnsOpt(Type.BASE64_BINARY, "the collation key"); - private static final FunctionParameterSequenceType PARAM_VALUE_STRING = param("value-string", Type.STRING, "The value string"); - private static final FunctionParameterSequenceType PARAM_COLLATION_STRING = param("collation-string", Type.STRING, "The collation string"); + private static final FunctionParameterSequenceType PARAM_VALUE_STRING = param("value", Type.STRING, "The value string"); + private static final FunctionParameterSequenceType PARAM_COLLATION_STRING = param("collation", Type.STRING, "The collation string"); public static final FunctionSignature[] FS_COLLATION_KEY_SIGNATURES = functionSignatures( FN_NAME, FN_DESCRIPTION, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCompare.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCompare.java index d2cd6e102c7..2b547f8a8a3 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCompare.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCompare.java @@ -32,14 +32,23 @@ import org.exist.xquery.Profiler; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AbstractDateTimeValue; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.DurationValue; +import org.exist.xquery.value.FloatValue; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.IntegerValue; import org.exist.xquery.value.Item; +import org.exist.xquery.value.NumericValue; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; +import java.math.BigDecimal; +import java.math.BigInteger; + import javax.annotation.Nullable; /** @@ -52,44 +61,33 @@ public class FunCompare extends CollatingFunction { public final static FunctionSignature[] signatures = { new FunctionSignature ( new QName("compare", Function.BUILTIN_FUNCTION_NS), - "Returns the collatable comparison between $string-1 and $string-2, using $collation-uri. " + - "-1 if $string-1 is inferior to $string-2, 0 if $string-1 is equal " + - "to $string-2, 1 if $string-1 is superior to $string-2. " + - "If either comparand is the empty sequence, the empty sequence is " + - "returned. " + - "Please remember to specify the collation in the context or use, " + - "the three argument version if you don't want the system default.", + "Returns -1, 0, or 1, depending on whether $value-1 is less than, equal to, " + + "or greater than $value-2. " + + "If either comparand is the empty sequence, the empty sequence is returned.", new SequenceType[] { - new FunctionParameterSequenceType("string-1", Type.STRING, - Cardinality.ZERO_OR_ONE, "The first string"), - new FunctionParameterSequenceType("string-2", Type.STRING, - Cardinality.ZERO_OR_ONE, "The second string") + new FunctionParameterSequenceType("value-1", Type.ANY_ATOMIC_TYPE, + Cardinality.ZERO_OR_ONE, "The first value"), + new FunctionParameterSequenceType("value-2", Type.ANY_ATOMIC_TYPE, + Cardinality.ZERO_OR_ONE, "The second value") }, new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_ONE, - "-1 if $string-1 is inferior to $string-2, " + - "0 if $string-1 is equal to $string-2, " + - "1 if $string-1 is superior to $string-2. " + - "If either comparand is the empty sequence, the empty sequence is returned.")), + "-1, 0, or 1 depending on comparison result")), new FunctionSignature ( new QName("compare", Function.BUILTIN_FUNCTION_NS), - "Returns the collatable comparison between $string-1 and $string-2, using $collation-uri. " + - "-1 if $string-1 is inferior to $string-2, 0 if $string-1 is equal " + - "to $string-2, 1 if $string-1 is superior to $string-2. " + + "Returns -1, 0, or 1, depending on whether $value-1 is less than, equal to, " + + "or greater than $value-2, using the specified collation. " + "If either comparand is the empty sequence, the empty sequence is returned. " + THIRD_REL_COLLATION_ARG_EXAMPLE, new SequenceType[] { - new FunctionParameterSequenceType("string-1", Type.STRING, - Cardinality.ZERO_OR_ONE, "The first string"), - new FunctionParameterSequenceType("string-2", Type.STRING, - Cardinality.ZERO_OR_ONE, "The second string"), + new FunctionParameterSequenceType("value-1", Type.ANY_ATOMIC_TYPE, + Cardinality.ZERO_OR_ONE, "The first value"), + new FunctionParameterSequenceType("value-2", Type.ANY_ATOMIC_TYPE, + Cardinality.ZERO_OR_ONE, "The second value"), new FunctionParameterSequenceType("collation-uri", Type.STRING, Cardinality.EXACTLY_ONE, "The relative collation URI") }, new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_ONE, - "-1 if $string-1 is inferior to $string-2, " + - "0 if $string-1 is equal to $string-2, " + - "1 if $string-1 is superior to $string-2. " + - "If either comparand is the empty sequence, the empty sequence is returned.")) + "-1, 0, or 1 depending on comparison result")) }; public FunCompare(XQueryContext context, FunctionSignature signature) { @@ -123,14 +121,132 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc return result; } - static int compare(final Item item1, final Item item2, @Nullable final Collator collator) throws XPathException { - final int comparison = Collations.compare(collator, item1.getStringValue(), item2.getStringValue()); - if (comparison == Constants.EQUAL) { + public static int compare(final Item item1, final Item item2, @Nullable final Collator collator) throws XPathException { + final AtomicValue v1 = item1.atomize(); + final AtomicValue v2 = item2.atomize(); + + // For string-like types, use collation-aware comparison + if (isStringLike(v1.getType()) && isStringLike(v2.getType())) { + return normalizeComparison(Collations.compare(collator, v1.getStringValue(), v2.getStringValue())); + } + + // XQ4 numeric total order: compare by exact mathematical magnitude + if (v1 instanceof NumericValue && v2 instanceof NumericValue) { + return numericTotalOrder((NumericValue) v1, (NumericValue) v2); + } + + // XQ4 duration total order: months first, then seconds + if (v1 instanceof DurationValue && v2 instanceof DurationValue) { + return durationTotalOrder((DurationValue) v1, (DurationValue) v2); + } + + // XQ4 date/time total order: normalize to millis for types where + // XMLGregorianCalendar.compare() may return INDETERMINATE + if (v1 instanceof AbstractDateTimeValue && v2 instanceof AbstractDateTimeValue + && v1.getType() == v2.getType()) { + return dateTimeTotalOrder((AbstractDateTimeValue) v1, (AbstractDateTimeValue) v2); + } + + // For other atomic types, use natural ordering via compareTo + return normalizeComparison(v1.compareTo(collator, v2)); + } + + /** + * XQ4 numeric total order for fn:compare. + * Float is promoted to double. NaN == NaN (and NaN < everything). + * -0.0 == +0.0. Doubles and decimals compared by exact mathematical magnitude. + */ + static int numericTotalOrder(final NumericValue v1, final NumericValue v2) throws XPathException { + // Promote float to double + final double d1 = v1.getDouble(); + final double d2 = v2.getDouble(); + + final boolean nan1 = Double.isNaN(d1); + final boolean nan2 = Double.isNaN(d2); + + // NaN equals NaN, NaN < everything else + if (nan1 && nan2) { return Constants.EQUAL; - } else if (comparison < 0) { + } + if (nan1) { return Constants.INFERIOR; - } else { + } + if (nan2) { return Constants.SUPERIOR; } + + // Handle infinities + if (Double.isInfinite(d1) || Double.isInfinite(d2)) { + if (d1 == d2) { + return Constants.EQUAL; + } + return d1 < d2 ? Constants.INFERIOR : Constants.SUPERIOR; + } + + // -0.0 == +0.0 + if (d1 == 0.0 && d2 == 0.0) { + return Constants.EQUAL; + } + + // Compare by exact mathematical magnitude using BigDecimal + final BigDecimal bd1 = toBigDecimal(v1); + final BigDecimal bd2 = toBigDecimal(v2); + return normalizeComparison(bd1.compareTo(bd2)); + } + + private static BigDecimal toBigDecimal(final NumericValue v) throws XPathException { + if (v instanceof org.exist.xquery.value.DecimalValue) { + return ((org.exist.xquery.value.DecimalValue) v).getValue(); + } + if (v instanceof IntegerValue) { + // Use string representation — getValue() truncates to long for big integers + return new BigDecimal(v.getStringValue()); + } + // Double or Float — use exact decimal representation (no rounding) + return new BigDecimal(v.getDouble()); + } + + /** + * XQ4 duration total order for fn:compare. + * Compares months component first, then seconds component. + * This provides a total order even for xs:duration values where + * months and seconds are both present (which XMLGregorianCalendar + * considers INDETERMINATE). + */ + static int durationTotalOrder(final DurationValue v1, final DurationValue v2) { + final BigInteger months1 = v1.monthsValueSigned(); + final BigInteger months2 = v2.monthsValueSigned(); + final int monthsCmp = months1.compareTo(months2); + if (monthsCmp != 0) { + return normalizeComparison(monthsCmp); + } + final BigDecimal seconds1 = v1.secondsValueSigned(); + final BigDecimal seconds2 = v2.secondsValueSigned(); + return normalizeComparison(seconds1.compareTo(seconds2)); + } + + /** + * XQ4 date/time total order for fn:compare. + * Uses getTimeInMillis() to normalize both values to a common + * representation, avoiding INDETERMINATE results from + * XMLGregorianCalendar.compare() on partial date/time types. + */ + static int dateTimeTotalOrder(final AbstractDateTimeValue v1, final AbstractDateTimeValue v2) { + final long ms1 = v1.getTimeInMillis(); + final long ms2 = v2.getTimeInMillis(); + return normalizeComparison(Long.compare(ms1, ms2)); + } + + private static int normalizeComparison(final int cmp) { + if (cmp == 0) { + return Constants.EQUAL; + } + return cmp < 0 ? Constants.INFERIOR : Constants.SUPERIOR; + } + + private static boolean isStringLike(final int type) { + return Type.subTypeOf(type, Type.STRING) + || type == Type.UNTYPED_ATOMIC + || Type.subTypeOf(type, Type.ANY_URI); } } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunConcat.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunConcat.java index c4046cead8f..9b654a42abb 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunConcat.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunConcat.java @@ -66,7 +66,7 @@ public class FunConcat extends Function { "is treated as the zero-length string.", new SequenceType[] { //More complicated : see below - new FunctionParameterSequenceType("atomizable-values", + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "The atomizable values") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunContainsToken.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunContainsToken.java index f675b3eaa87..cebd1da86f1 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunContainsToken.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunContainsToken.java @@ -42,9 +42,9 @@ public class FunContainsToken extends BasicFunction { private static final QName FS_CONTAINS_TOKEN_NAME = new QName("contains-token", Function.BUILTIN_FUNCTION_NS); - private final static FunctionParameterSequenceType FS_INPUT = optManyParam("input", Type.STRING, "The input string"); + private final static FunctionParameterSequenceType FS_INPUT = optManyParam("value", Type.STRING, "The input string"); private final static FunctionParameterSequenceType FS_TOKEN = param("token", Type.STRING, "The token to be searched for"); - private final static FunctionParameterSequenceType FS_COLLATION = param("pattern", Type.STRING, "Collation to use"); + private final static FunctionParameterSequenceType FS_COLLATION = param("collation", Type.STRING, "Collation to use"); public final static FunctionSignature[] FS_CONTAINS_TOKEN = functionSignatures( FS_CONTAINS_TOKEN_NAME, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCount.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCount.java index 3e9e3440b38..f884d060a93 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCount.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCount.java @@ -44,7 +44,7 @@ public class FunCount extends Function { new QName("count", Function.BUILTIN_FUNCTION_NS), "Returns the number of items in the argument sequence, $items.", new SequenceType[]{ - new FunctionParameterSequenceType("items", Type.ITEM, + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The items") }, new FunctionReturnSequenceType(Type.INTEGER, Cardinality.EXACTLY_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunData.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunData.java index 8594c7ea63b..98bc2662253 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunData.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunData.java @@ -57,7 +57,7 @@ public class FunData extends Function { qnData, "Atomizes the sequence $items, replacing all nodes in the sequence by their typed values.", new SequenceType[] { - new FunctionParameterSequenceType("items", Type.ITEM, + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The items") }, new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, @@ -94,6 +94,8 @@ public Sequence eval(Sequence contextSequence, Item contextItem) } else { if (contextItem != null) { items = Atomize.atomize(contextItem.toSequence()); + } else if (contextSequence != null) { + items = Atomize.atomize(contextSequence); } else { items = Sequence.EMPTY_SEQUENCE; } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDeepEqual.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDeepEqual.java index 6e6e0285dc2..ae5d32a6917 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDeepEqual.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDeepEqual.java @@ -35,6 +35,7 @@ import org.exist.xquery.Dependency; import org.exist.xquery.Function; import org.exist.xquery.FunctionSignature; +import org.exist.xquery.InlineFunction; import org.exist.xquery.Profiler; import org.exist.xquery.ValueComparison; import org.exist.xquery.XPathException; @@ -43,6 +44,7 @@ import org.exist.xquery.functions.map.AbstractMapType; import org.exist.xquery.value.AtomicValue; import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionReference; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.Item; @@ -55,6 +57,8 @@ import org.w3c.dom.Node; import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; /** * Implements the fn:deep-equal library function. @@ -72,9 +76,9 @@ public class FunDeepEqual extends CollatingFunction { "at the same position in $items-2, false() otherwise. " + "If both $items-1 and $items-2 are the empty sequence, returns true(). ", new SequenceType[] { - new FunctionParameterSequenceType("items-1", Type.ITEM, + new FunctionParameterSequenceType("input1", Type.ITEM, Cardinality.ZERO_OR_MORE, "The first item sequence"), - new FunctionParameterSequenceType("items-2", Type.ITEM, + new FunctionParameterSequenceType("input2", Type.ITEM, Cardinality.ZERO_OR_MORE, "The second item sequence") }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, @@ -85,14 +89,14 @@ public class FunDeepEqual extends CollatingFunction { "Returns true() iff every item in $items-1 is deep-equal to the item " + "at the same position in $items-2, false() otherwise. " + "If both $items-1 and $items-2 are the empty sequence, returns true(). " + - "Comparison collation is specified by $collation-uri. " + + "Comparison collation is specified by $collation-uri. " + THIRD_REL_COLLATION_ARG_EXAMPLE, new SequenceType[] { - new FunctionParameterSequenceType("items-1", Type.ITEM, + new FunctionParameterSequenceType("input1", Type.ITEM, Cardinality.ZERO_OR_MORE, "The first item sequence"), - new FunctionParameterSequenceType("items-2", Type.ITEM, + new FunctionParameterSequenceType("input2", Type.ITEM, Cardinality.ZERO_OR_MORE, "The second item sequence"), - new FunctionParameterSequenceType("collation-uri", Type.STRING, + new FunctionParameterSequenceType("options", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI") }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, @@ -225,6 +229,29 @@ public static int deepCompare(final Item item1, final Item item2, @Nullable fina } } + // XQ4: Function items compared by function-identity semantics + if (Type.subTypeOf(item1.getType(), Type.FUNCTION) || Type.subTypeOf(item2.getType(), Type.FUNCTION)) { + if (!Type.subTypeOf(item1.getType(), Type.FUNCTION) || !Type.subTypeOf(item2.getType(), Type.FUNCTION)) { + return Constants.INFERIOR; + } + if (item1 == item2) { + return Constants.EQUAL; + } + // Named functions with same name and arity are equal + if (item1 instanceof FunctionReference ref1 && item2 instanceof FunctionReference ref2) { + final org.exist.dom.QName name1 = ref1.getSignature().getName(); + final org.exist.dom.QName name2 = ref2.getSignature().getName(); + if (name1 != null && name2 != null + && name1 != InlineFunction.INLINE_FUNCTION_QNAME + && name2 != InlineFunction.INLINE_FUNCTION_QNAME + && name1.equals(name2) + && ref1.getSignature().getArgumentCount() == ref2.getSignature().getArgumentCount()) { + return Constants.EQUAL; + } + } + return Constants.INFERIOR; + } + final boolean item1IsAtomic = Type.subTypeOf(item1.getType(), Type.ANY_ATOMIC_TYPE); final boolean item2IsAtomic = Type.subTypeOf(item2.getType(), Type.ANY_ATOMIC_TYPE); if (item1IsAtomic || item2IsAtomic) { @@ -370,44 +397,71 @@ private static int compareElements(final Node a, final Node b, @Nullable final C } private static int compareContents(Node a, Node b, @Nullable final Collator collator) { - a = findNextTextOrElementNode(a.getFirstChild()); - b = findNextTextOrElementNode(b.getFirstChild()); - while (!(a == null || b == null)) { - final int nodeTypeA = getEffectiveNodeType(a); - final int nodeTypeB = getEffectiveNodeType(b); - if (nodeTypeA != nodeTypeB) { - return Constants.INFERIOR; - } - switch (nodeTypeA) { - case Node.TEXT_NODE: - final String nodeValueA = getNodeValue(a); - final String nodeValueB = getNodeValue(b); - final int textComparison = safeCompare(nodeValueA, nodeValueB, collator); + // XQ4: merge adjacent text nodes (split by ignored comments/PIs) + final List childrenA = mergeTextNodes(a); + final List childrenB = mergeTextNodes(b); + + if (childrenA.size() != childrenB.size()) { + return childrenA.size() < childrenB.size() ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (int i = 0; i < childrenA.size(); i++) { + final Object ca = childrenA.get(i); + final Object cb = childrenB.get(i); + + if (ca instanceof String sa && cb instanceof String sb) { + final int textComparison = safeCompare(sa, sb, collator); if (textComparison != Constants.EQUAL) { return textComparison; } - break; - case Node.ELEMENT_NODE: - final int elementComparison = compareElements(a, b, collator); - if (elementComparison != Constants.EQUAL) { - return elementComparison; + } else if (ca instanceof Node na && cb instanceof Node nb) { + if (getEffectiveNodeType(na) != getEffectiveNodeType(nb)) { + return Constants.INFERIOR; } - break; - default: - throw new RuntimeException("unexpected node type " + nodeTypeA); + if (getEffectiveNodeType(na) == Node.ELEMENT_NODE) { + final int cmp = compareElements(na, nb, collator); + if (cmp != Constants.EQUAL) { + return cmp; + } + } else { + throw new RuntimeException("unexpected node type " + getEffectiveNodeType(na)); + } + } else { + return Constants.INFERIOR; } - a = findNextTextOrElementNode(a.getNextSibling()); - b = findNextTextOrElementNode(b.getNextSibling()); } + return Constants.EQUAL; + } - // NOTE(AR): intentional reference equality check - if (a == b) { - return Constants.EQUAL; // both null - } else if (a == null) { - return Constants.INFERIOR; - } else { - return Constants.SUPERIOR; + /** + * Collect significant children (text + element nodes), merging adjacent + * text nodes that result from skipping comments/PIs. + */ + private static List mergeTextNodes(final Node parent) { + final List result = new ArrayList<>(); + StringBuilder currentText = null; + Node child = parent.getFirstChild(); + while (child != null) { + final int nodeType = getEffectiveNodeType(child); + if (nodeType == Node.TEXT_NODE) { + if (currentText == null) { + currentText = new StringBuilder(); + } + currentText.append(getNodeValue(child)); + } else if (nodeType == Node.ELEMENT_NODE) { + if (currentText != null) { + result.add(currentText.toString()); + currentText = null; + } + result.add(child); + } + // Skip comments, PIs, and other non-significant nodes + child = child.getNextSibling(); } + if (currentText != null) { + result.add(currentText.toString()); + } + return result; } private static String getNodeValue(final Node n) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDistinctValues.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDistinctValues.java index 155b654ce4c..3f7c59d9408 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDistinctValues.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDistinctValues.java @@ -66,7 +66,7 @@ public class FunDistinctValues extends CollatingFunction { "Returns a sequence where duplicate values of $atomic-values, " + "based on value equality, have been deleted.", new SequenceType[] { - new FunctionParameterSequenceType("atomic-values", Type.ANY_ATOMIC_TYPE, + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The atomic values") }, new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, @@ -75,12 +75,12 @@ public class FunDistinctValues extends CollatingFunction { new FunctionSignature( new QName("distinct-values", Function.BUILTIN_FUNCTION_NS, FnModule.PREFIX), "Returns a sequence where duplicate values of $atomic-values, " + - "based on value equality specified by collation $collation-uri, " + + "based on value equality specified by collation $collation-uri, " + "have been deleted.", - new SequenceType[] { - new FunctionParameterSequenceType("atomic-values", Type.ANY_ATOMIC_TYPE, + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The atomic values"), - new FunctionParameterSequenceType("collation-uri", Type.STRING, + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI") }, new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDoc.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDoc.java index 65b663f4848..365e6d11cff 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDoc.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDoc.java @@ -57,7 +57,7 @@ public class FunDoc extends Function { "Returns the document node of $document-uri. " + XMLDBModule.ANY_URI, new SequenceType[] { - new FunctionParameterSequenceType("document-uri", Type.STRING, + new FunctionParameterSequenceType("source", Type.STRING, Cardinality.ZERO_OR_ONE, "The document URI") }, new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDocAvailable.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDocAvailable.java index be44565be0a..2f4cfbee3df 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDocAvailable.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDocAvailable.java @@ -56,7 +56,7 @@ public class FunDocAvailable extends Function { "specified in the input sequence is available. " + XMLDBModule.ANY_URI, new SequenceType[]{ - new FunctionParameterSequenceType("document-uri", Type.STRING, + new FunctionParameterSequenceType("source", Type.STRING, Cardinality.ZERO_OR_ONE, "The document URI") }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDocumentURI.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDocumentURI.java index b052164104e..1c6b6c28e1c 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDocumentURI.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunDocumentURI.java @@ -37,7 +37,7 @@ */ public class FunDocumentURI extends Function { - private static final FunctionParameterSequenceType FS_PARAM_NODE = optParam("value", Type.NODE, "The document node."); + private static final FunctionParameterSequenceType FS_PARAM_NODE = optParam("node", Type.NODE, "The document node."); private static final String FS_DOCUMENT_URI = "document-uri"; private static final String FS_DESCRIPTION = "Returns the URI of a resource where a document can be found, if available."; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunElementWithId.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunElementWithId.java index dd289251062..00b4a363ec1 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunElementWithId.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunElementWithId.java @@ -44,7 +44,7 @@ public class FunElementWithId extends BasicFunction { "matching the value of one or more of the IDREF values supplied in $idrefs. " + "If none is matching or $idrefs is the empty sequence, returns the empty sequence."; private static final FunctionReturnSequenceType FN_RETURN = returnsOptMany(Type.STRING, "the elements with IDs matching IDREFs from $idref-sequence"); - private static final FunctionParameterSequenceType PARAM_ID_REFS_STRING = optManyParam("idrefs", Type.STRING, "The IDREF sequence"); + private static final FunctionParameterSequenceType PARAM_ID_REFS_STRING = optManyParam("values", Type.STRING, "The IDREF sequence"); public static final FunctionSignature[] FS_ELEMENT_WITH_ID_SIGNATURES = functionSignatures( FN_NAME, FN_DESCRIPTION, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEmpty.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEmpty.java index 5aaf2d72c28..7ca39701335 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEmpty.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEmpty.java @@ -47,7 +47,7 @@ public class FunEmpty extends Function { new QName("empty", Function.BUILTIN_FUNCTION_NS), "Returns true() if the value of $items is the empty sequence, false() otherwise.", new SequenceType[] { - new FunctionParameterSequenceType("items", Type.ITEM, + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The item sequence") }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEncodeForURI.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEncodeForURI.java index 3e2a56b4b0b..14e312160f9 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEncodeForURI.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEncodeForURI.java @@ -51,7 +51,7 @@ public class FunEncodeForURI extends Function { "with its percent-encoded form as described in [RFC 3986]. " + "If $uri-part is the empty sequence, returns the zero-length string.", new SequenceType[] { - new FunctionParameterSequenceType("uri-part", Type.STRING, + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The URI part to encode") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunError.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunError.java index 1643d4e5ac7..2c92e096c56 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunError.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunError.java @@ -61,7 +61,7 @@ public class FunError extends BasicFunction { "The script will terminate immediately with an exception using " + "$qname and the default message, 'An error has been raised by the query'.", new SequenceType[] { - new FunctionParameterSequenceType("qname", Type.QNAME, + new FunctionParameterSequenceType("code", Type.QNAME, Cardinality.ZERO_OR_ONE, "The qname") }, new SequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE) @@ -72,9 +72,9 @@ public class FunError extends BasicFunction { "The script will terminate immediately with an exception using " + "$qname and $message.", new SequenceType[] { - new FunctionParameterSequenceType("qname", Type.QNAME, + new FunctionParameterSequenceType("code", Type.QNAME, Cardinality.ZERO_OR_ONE, "The qname"), - new FunctionParameterSequenceType("message", Type.STRING, + new FunctionParameterSequenceType("description", Type.STRING, Cardinality.EXACTLY_ONE, "The message") }, new SequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE)), @@ -84,11 +84,11 @@ public class FunError extends BasicFunction { "The script will terminate immediately with an exception using " + "$qname and $message with $error-object appended.", new SequenceType[] { - new FunctionParameterSequenceType("qname", Type.QNAME, + new FunctionParameterSequenceType("code", Type.QNAME, Cardinality.ZERO_OR_ONE, "The qname"), - new FunctionParameterSequenceType("message", Type.STRING, + new FunctionParameterSequenceType("description", Type.STRING, Cardinality.EXACTLY_ONE, "The message"), - new FunctionParameterSequenceType("error-object", Type.ITEM, + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_MORE, "The error object") }, new SequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE)), diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEscapeHTMLURI.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEscapeHTMLURI.java index 4ed6aa72956..ff81f77e4f6 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEscapeHTMLURI.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunEscapeHTMLURI.java @@ -53,7 +53,7 @@ public class FunEscapeHTMLURI extends Function { "in the form %XX. If $html-uri is the empty sequence, " + "returns the zero-length string.", new SequenceType[] { - new FunctionParameterSequenceType("html-uri", Type.STRING, + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The html URI") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunExactlyOne.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunExactlyOne.java index 588f0b47206..b0f25f0dc06 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunExactlyOne.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunExactlyOne.java @@ -53,7 +53,7 @@ public class FunExactlyOne extends Function { "Returns the argument sequence, $items, if it contains exactly one item. Otherwise, " + "raises an error.", new SequenceType[] { - new FunctionParameterSequenceType("items", Type.ITEM, Cardinality.ZERO_OR_MORE, "The item sequence") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The item sequence") }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.EXACTLY_ONE, "the sole item in $items if it contains exactly one item. Otherwise, an error is raised.")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunExists.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunExists.java index b6c298bc4ca..fd25e9ba8e8 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunExists.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunExists.java @@ -50,7 +50,7 @@ public class FunExists extends Function { "Returns true if the argument $items is not the empty sequence, " + "false otherwise.", new SequenceType[] { - new FunctionParameterSequenceType("items", Type.ITEM, Cardinality.ZERO_OR_MORE, "The item sequence") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The item sequence") }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true() if not the empty-sequence, false() otherwise")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunFloor.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunFloor.java index 83a84f9084b..6edc84bbf88 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunFloor.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunFloor.java @@ -45,7 +45,7 @@ public class FunFloor extends Function { "Returns the largest number not greater than the value of $number. " + "If $number is the empty sequence, returns the empty sequence.", new SequenceType[] { - new FunctionParameterSequenceType("number", Type.NUMERIC, Cardinality.ZERO_OR_ONE, "The number") + new FunctionParameterSequenceType("value", Type.NUMERIC, Cardinality.ZERO_OR_ONE, "The number") }, new FunctionReturnSequenceType(Type.NUMERIC, Cardinality.ZERO_OR_ONE, "the largest number without fraction part not greater than the value of $number")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunGetDateComponent.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunGetDateComponent.java index 983d32d0c42..e389fbbfe9a 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunGetDateComponent.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunGetDateComponent.java @@ -50,9 +50,9 @@ */ public class FunGetDateComponent extends BasicFunction { protected static final Logger logger = LogManager.getLogger(FunGetDateComponent.class); - public final static FunctionParameterSequenceType DATE_01_PARAM = new FunctionParameterSequenceType("date", Type.DATE, Cardinality.ZERO_OR_ONE, "The date as xs:date"); - public final static FunctionParameterSequenceType TIME_01_PARAM = new FunctionParameterSequenceType("time", Type.TIME, Cardinality.ZERO_OR_ONE, "The time as xs:time"); - public final static FunctionParameterSequenceType DATE_TIME_01_PARAM = new FunctionParameterSequenceType("date-time", Type.DATE_TIME, Cardinality.ZERO_OR_ONE, "The date-time as xs:dateTime"); + public final static FunctionParameterSequenceType DATE_01_PARAM = new FunctionParameterSequenceType("value", Type.DATE, Cardinality.ZERO_OR_ONE, "The date as xs:date"); + public final static FunctionParameterSequenceType TIME_01_PARAM = new FunctionParameterSequenceType("value", Type.TIME, Cardinality.ZERO_OR_ONE, "The time as xs:time"); + public final static FunctionParameterSequenceType DATE_TIME_01_PARAM = new FunctionParameterSequenceType("value", Type.DATE_TIME, Cardinality.ZERO_OR_ONE, "The date-time as xs:dateTime"); // ----- fromDate public final static FunctionSignature fnDayFromDate = diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunGetDurationComponent.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunGetDurationComponent.java index b2f3b92d60a..ac728e1f1eb 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunGetDurationComponent.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunGetDurationComponent.java @@ -54,9 +54,9 @@ */ public class FunGetDurationComponent extends BasicFunction { protected static final Logger logger = LogManager.getLogger(FunGetDurationComponent.class); - public final static FunctionParameterSequenceType DAYTIME_DURA_01_PARAM = new FunctionParameterSequenceType("duration", Type.DAY_TIME_DURATION, Cardinality.ZERO_OR_ONE, "The duration as xs:dayTimeDuration"); - public final static FunctionParameterSequenceType YEARMONTH_DURA_01_PARAM = new FunctionParameterSequenceType("duration", Type.YEAR_MONTH_DURATION, Cardinality.ZERO_OR_ONE, "The duration as xs:yearMonthDuration"); - public final static FunctionParameterSequenceType DURA_01_PARAM = new FunctionParameterSequenceType("duration", Type.DURATION, Cardinality.ZERO_OR_ONE, "The duration as xs:duration"); + public final static FunctionParameterSequenceType DAYTIME_DURA_01_PARAM = new FunctionParameterSequenceType("value", Type.DAY_TIME_DURATION, Cardinality.ZERO_OR_ONE, "The duration as xs:dayTimeDuration"); + public final static FunctionParameterSequenceType YEARMONTH_DURA_01_PARAM = new FunctionParameterSequenceType("value", Type.YEAR_MONTH_DURATION, Cardinality.ZERO_OR_ONE, "The duration as xs:yearMonthDuration"); + public final static FunctionParameterSequenceType DURA_01_PARAM = new FunctionParameterSequenceType("value", Type.DURATION, Cardinality.ZERO_OR_ONE, "The duration as xs:duration"); public final static FunctionSignature fnDaysFromDuration = new FunctionSignature( diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunHeadTail.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunHeadTail.java index c01a4110863..bd3949aca2a 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunHeadTail.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunHeadTail.java @@ -35,27 +35,46 @@ import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; public class FunHeadTail extends BasicFunction { - public final static FunctionSignature[] signatures = { - new FunctionSignature( - new QName("head", Function.BUILTIN_FUNCTION_NS), - "The function returns the value of the expression $arg[1], i.e. the first item in the " + - "passed in sequence.", - new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ITEM, Cardinality.ZERO_OR_MORE, "") - }, - new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_ONE, "the first item or the empty sequence")), - new FunctionSignature( - new QName("tail", Function.BUILTIN_FUNCTION_NS), - "The function returns the value of the expression subsequence($sequence, 2), i.e. a new sequence containing " + - "all items of the input sequence except the first.", - new SequenceType[] { - new FunctionParameterSequenceType("sequence", Type.ITEM, Cardinality.ZERO_OR_MORE, "The source sequence") - }, - new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the resulting sequence")) }; - + public final static FunctionSignature FN_HEAD = new FunctionSignature( + new QName("head", Function.BUILTIN_FUNCTION_NS), + "The function returns the value of the expression $arg[1], i.e. the first item in the " + + "passed in sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("arg", Type.ITEM, Cardinality.ZERO_OR_MORE, "") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_ONE, "the first item or the empty sequence")); + + public final static FunctionSignature FN_TAIL = new FunctionSignature( + new QName("tail", Function.BUILTIN_FUNCTION_NS), + "The function returns the value of the expression subsequence($sequence, 2), i.e. a new sequence containing " + + "all items of the input sequence except the first.", + new SequenceType[] { + new FunctionParameterSequenceType("sequence", Type.ITEM, Cardinality.ZERO_OR_MORE, "The source sequence") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the resulting sequence")); + + public final static FunctionSignature FN_FOOT = new FunctionSignature( + new QName("foot", Function.BUILTIN_FUNCTION_NS), + "Returns the last item in a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_ONE, "the last item or the empty sequence")); + + public final static FunctionSignature FN_TRUNK = new FunctionSignature( + new QName("trunk", Function.BUILTIN_FUNCTION_NS), + "Returns all but the last item in a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "all items except the last")); + + public final static FunctionSignature[] signatures = { FN_HEAD, FN_TAIL, FN_FOOT, FN_TRUNK }; + public FunHeadTail(XQueryContext context, FunctionSignature signature) { super(context, signature); } @@ -64,24 +83,36 @@ public FunHeadTail(XQueryContext context, FunctionSignature signature) { public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { super.analyze(contextInfo); if (getContext().getXQueryVersion()<30) { - throw new XPathException(this, ErrorCodes.EXXQDY0003, "Function " + + throw new XPathException(this, ErrorCodes.EXXQDY0003, "Function " + getSignature().getName() + " is only supported for xquery version \"3.0\" and later."); } } - + @Override public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { final Sequence seq = args[0]; - Sequence tmp; if (seq.isEmpty()) { - tmp = Sequence.EMPTY_SEQUENCE; - } else if (isCalledAs("head")) { - tmp = seq.itemAt(0).toSequence(); + return Sequence.EMPTY_SEQUENCE; + } + if (isCalledAs("head")) { + return seq.itemAt(0).toSequence(); + } else if (isCalledAs("tail")) { + return seq.tail(); + } else if (isCalledAs("foot")) { + return seq.itemAt(seq.getItemCount() - 1).toSequence(); } else { - tmp = seq.tail(); + // trunk: all items except the last + final int count = seq.getItemCount(); + if (count <= 1) { + return Sequence.EMPTY_SEQUENCE; + } + final ValueSequence result = new ValueSequence(count - 1); + for (int i = 0; i < count - 1; i++) { + result.add(seq.itemAt(i)); + } + return result; } - return tmp; } } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIRIToURI.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIRIToURI.java index bb94db1500a..b9f30a6480b 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIRIToURI.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIRIToURI.java @@ -80,7 +80,7 @@ public class FunIRIToURI extends Function { new FunctionSignature( new QName("iri-to-uri", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION, - new SequenceType[] { new FunctionParameterSequenceType("iri", Type.STRING, Cardinality.ZERO_OR_ONE, "The IRI") }, + new SequenceType[] { new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The IRI") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the URI")); public FunIRIToURI(XQueryContext context, FunctionSignature signature) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunId.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunId.java index ba330cfb4c4..4eb25770fc3 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunId.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunId.java @@ -52,7 +52,7 @@ public class FunId extends Function { "matching the value of one or more of the IDREF values supplied in $idrefs. " + "If none is matching or $idrefs is the empty sequence, returns the empty sequence.", new SequenceType[] { - new FunctionParameterSequenceType("idrefs", Type.STRING, Cardinality.ZERO_OR_MORE, "The IDREF sequence") + new FunctionParameterSequenceType("values", Type.STRING, Cardinality.ZERO_OR_MORE, "The IDREF sequence") }, new FunctionReturnSequenceType(Type.ELEMENT, Cardinality.ZERO_OR_MORE, "the elements with IDs matching IDREFs from $idref-sequence")), new FunctionSignature( @@ -61,8 +61,8 @@ public class FunId extends Function { "matching the value of one or more of the IDREF values supplied in $idrefs and is in the same document as $node-in-document. " + "If none is matching or $idrefs is the empty sequence, returns the empty sequence.", new SequenceType[] { - new FunctionParameterSequenceType("idrefs", Type.STRING, Cardinality.ZERO_OR_MORE, "The IDREF sequence"), - new FunctionParameterSequenceType("node-in-document", Type.NODE, Cardinality.EXACTLY_ONE, "The node in document") + new FunctionParameterSequenceType("values", Type.STRING, Cardinality.ZERO_OR_MORE, "The IDREF sequence"), + new FunctionParameterSequenceType("node", Type.NODE, Cardinality.EXACTLY_ONE, "The node in document") }, new FunctionReturnSequenceType(Type.ELEMENT, Cardinality.ZERO_OR_MORE, "the elements with IDs matching IDREFs from $idrefs in the same document as $node-in-document")) }; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIdRef.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIdRef.java index 19637562f7e..b77fee3321f 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIdRef.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIdRef.java @@ -72,7 +72,7 @@ public class FunIdRef extends Function { "value of one or more of the ID values supplied in $ids. " + "If none is matching or $ids is the empty sequence, returns the empty sequence.", new SequenceType[] { - new FunctionParameterSequenceType("ids", Type.STRING, Cardinality.ZERO_OR_MORE, "The ID sequence"), + new FunctionParameterSequenceType("values", Type.STRING, Cardinality.ZERO_OR_MORE, "The ID sequence"), }, new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, "the elements with matching IDREF values from IDs in $ids")), @@ -82,8 +82,8 @@ public class FunIdRef extends Function { "value of one or more of the ID values supplied in $ids. " + "If none is matching or $ids is the empty sequence, returns the empty sequence.", new SequenceType[] { - new FunctionParameterSequenceType("ids", Type.STRING, Cardinality.ZERO_OR_MORE, "The ID sequence"), - new FunctionParameterSequenceType("node-in-document", Type.NODE, Cardinality.EXACTLY_ONE, "The node in document") + new FunctionParameterSequenceType("values", Type.STRING, Cardinality.ZERO_OR_MORE, "The ID sequence"), + new FunctionParameterSequenceType("node", Type.NODE, Cardinality.EXACTLY_ONE, "The node in document") }, new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, "the elements with matching IDREF values from IDs in $ids in the same document as $node-in-document")) }; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIndexOf.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIndexOf.java index a20afb4774a..d804703c9f5 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIndexOf.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunIndexOf.java @@ -52,11 +52,11 @@ public class FunIndexOf extends BasicFunction { protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_MORE, "the sequence of positive integers giving the positions within the sequence"); - protected static final FunctionParameterSequenceType COLLATION_PARAM = new FunctionParameterSequenceType("collation-uri", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI"); + protected static final FunctionParameterSequenceType COLLATION_PARAM = new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI"); - protected static final FunctionParameterSequenceType SEARCH_PARAM = new FunctionParameterSequenceType("search", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The search component"); + protected static final FunctionParameterSequenceType SEARCH_PARAM = new FunctionParameterSequenceType("target", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The search component"); - protected static final FunctionParameterSequenceType SEQ_PARAM = new FunctionParameterSequenceType("source", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The source sequence"); + protected static final FunctionParameterSequenceType SEQ_PARAM = new FunctionParameterSequenceType("input", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The source sequence"); protected static final String FUNCTION_DESCRIPTION = diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunInsertBefore.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunInsertBefore.java index 751c7d358d3..91ec4974205 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunInsertBefore.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunInsertBefore.java @@ -69,9 +69,9 @@ public class FunInsertBefore extends Function { new QName("insert-before", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION, new SequenceType[] { - new FunctionParameterSequenceType("target", Type.ITEM, Cardinality.ZERO_OR_MORE, "The target"), + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The target"), new FunctionParameterSequenceType("position", Type.INTEGER, Cardinality.EXACTLY_ONE, "The position to insert before"), - new FunctionParameterSequenceType("inserts", Type.ITEM, Cardinality.ZERO_OR_MORE, "The data to insert") + new FunctionParameterSequenceType("insert", Type.ITEM, Cardinality.ZERO_OR_MORE, "The data to insert") }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the new sequence")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunLang.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunLang.java index 49ad0f49061..7d25ec83e4e 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunLang.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunLang.java @@ -77,7 +77,7 @@ public class FunLang extends Function { new QName("lang", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION_1_PARAM + FUNCTION_DESCRIPTION_BOTH, new SequenceType[] { - new FunctionParameterSequenceType("lang", Type.STRING, Cardinality.ZERO_OR_ONE, "The language code") + new FunctionParameterSequenceType("language", Type.STRING, Cardinality.ZERO_OR_ONE, "The language code") }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if the language code matches, false otherwise") ), @@ -85,7 +85,7 @@ public class FunLang extends Function { new QName("lang", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION_2_PARAMS + FUNCTION_DESCRIPTION_BOTH, new SequenceType[] { - new FunctionParameterSequenceType("lang", Type.STRING, Cardinality.ZERO_OR_ONE, "The language code"), + new FunctionParameterSequenceType("language", Type.STRING, Cardinality.ZERO_OR_ONE, "The language code"), new FunctionParameterSequenceType("node", Type.NODE, Cardinality.EXACTLY_ONE, "The node") }, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if the language code matches, false otherwise") diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunLocalName.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunLocalName.java index a9c0392cbf5..fc3d7788f6d 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunLocalName.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunLocalName.java @@ -74,7 +74,7 @@ public class FunLocalName extends Function { new QName("local-name", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION, new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.NODE, Cardinality.ZERO_OR_ONE, "The node to retrieve the local name from") + new FunctionParameterSequenceType("node", Type.NODE, Cardinality.ZERO_OR_ONE, "The node to retrieve the local name from") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the local name") ) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java index 6f06bd772ce..52e0b4187fd 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java @@ -61,9 +61,10 @@ */ public final class FunMatches extends Function implements Optimizable, IndexUseReporter { - private static final FunctionParameterSequenceType FS_PARAM_INPUT = optParam("input", Type.STRING, "The input string"); + private static final FunctionParameterSequenceType FS_PARAM_INPUT = optParam("value", Type.STRING, "The input string"); private static final FunctionParameterSequenceType FS_PARAM_PATTERN = param("pattern", Type.STRING, "The pattern"); - private static final FunctionParameterSequenceType FS_PARAM_FLAGS = param("flags", Type.STRING, "The flags"); + private static final FunctionParameterSequenceType FS_PARAM_FLAGS = + new FunctionParameterSequenceType("flags", Type.STRING, Cardinality.ZERO_OR_ONE, "The flags"); private static final String FS_MATCHES_NAME = "matches"; private static final String FS_DESCRIPTION = @@ -138,7 +139,7 @@ public void setArguments(final List arguments) throws XPathException if (arguments.size() >= 3) { Expression arg = arguments.get(2); - arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg, + arg = new DynamicCardinalityCheck(context, Cardinality.ZERO_OR_ONE, arg, new Error(Error.FUNC_PARAM_CARDINALITY, "3", getSignature())); if (!Type.subTypeOf(arg.returnsType(), Type.ANY_ATOMIC_TYPE)) { arg = new Atomize(context, arg); @@ -212,7 +213,8 @@ public NodeSet preSelect(final Sequence contextSequence, final boolean useContex final int flags; if (getSignature().getArgumentCount() == 3) { - final String flagsArg = getArgument(2).eval(contextSequence, null).getStringValue(); + final Sequence flagsSeq = getArgument(2).eval(contextSequence, null); + final String flagsArg = flagsSeq.isEmpty() ? "" : flagsSeq.getStringValue(); flags = parseFlags(this, flagsArg); } else { flags = 0; @@ -382,7 +384,8 @@ private Sequence evalWithIndex(final Sequence contextSequence, final Item contex final int flags; if (getSignature().getArgumentCount() == 3) { - final String flagsArg = getArgument(2).eval(contextSequence, contextItem).getStringValue(); + final Sequence flagsSeq = getArgument(2).eval(contextSequence, contextItem); + final String flagsArg = flagsSeq.isEmpty() ? "" : flagsSeq.getStringValue(); flags = parseFlags(this, flagsArg); } else { flags = 0; @@ -497,7 +500,8 @@ private Sequence evalGeneric(final Sequence contextSequence, final Item contextI final String xmlRegexFlags; if (getSignature().getArgumentCount() == 3) { - xmlRegexFlags = getArgument(2).eval(contextSequence, contextItem).getStringValue(); + final Sequence flagsSeq = getArgument(2).eval(contextSequence, contextItem); + xmlRegexFlags = flagsSeq.isEmpty() ? "" : flagsSeq.getStringValue(); } else { xmlRegexFlags = ""; } @@ -512,7 +516,16 @@ private Sequence evalGeneric(final Sequence contextSequence, final Item contextI } - private boolean matchXmlRegex(final String string, final String pattern, final String flags) throws XPathException { + private boolean matchXmlRegex(String string, String pattern, String flags) throws XPathException { + // XQ4: 'c' flag — strip regex comments before compilation + final boolean hasCommentFlag = flags.indexOf('c') >= 0 && flags.indexOf('q') < 0; + if (flags.indexOf('c') >= 0) { + flags = flags.replace("c", ""); + } + if (hasCommentFlag) { + pattern = FunReplace.stripRegexComments(pattern); + } + try { List warnings = new ArrayList<>(1); RegularExpression regex = context.getBroker().getBrokerPool() @@ -526,6 +539,18 @@ private boolean matchXmlRegex(final String string, final String pattern, final S return regex.containsMatch(string); } catch (final net.sf.saxon.trans.XPathException e) { + // Fallback: if the pattern uses \p{Is} Unicode block names that + // Saxon doesn't recognize, convert to Java's \p{In} and use Java regex + if (pattern.contains("\\p{Is") || pattern.contains("\\P{Is")) { + try { + final String javaPattern = org.exist.xquery.regex.RegexUtil.translateRegexp( + this, pattern, flags.contains("x"), flags.contains("i")); + int javaFlags = org.exist.xquery.regex.RegexUtil.parseFlags(this, flags); + return Pattern.compile(javaPattern, javaFlags).matcher(string).find(); + } catch (final XPathException | PatternSyntaxException ignored) { + // fallback failed, throw original Saxon error + } + } switch (e.getErrorCodeLocalPart()) { case "FORX0001" -> throw new XPathException(this, ErrorCodes.FORX0001, "Invalid regular expression: " + e.getMessage()); case "FORX0002" -> throw new XPathException(this, ErrorCodes.FORX0002, "Invalid regular expression: " + e.getMessage()); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMax.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMax.java index 31fe3ee95b3..41e356645b1 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMax.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMax.java @@ -23,8 +23,8 @@ import com.ibm.icu.text.Collator; import org.exist.dom.QName; -import org.exist.util.Collations; import org.exist.xquery.Cardinality; +import org.exist.xquery.Constants; import org.exist.xquery.Dependency; import org.exist.xquery.ErrorCodes; import org.exist.xquery.Function; @@ -33,191 +33,145 @@ import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.AtomicValue; -import org.exist.xquery.value.ComputableValue; -import org.exist.xquery.value.DoubleValue; import org.exist.xquery.value.DurationValue; -import org.exist.xquery.value.FloatValue; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.Item; import org.exist.xquery.value.NumericValue; -import org.exist.xquery.value.QNameValue; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceIterator; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; /** + * Implementation of fn:max with XQuery 4.0 semantics. + * Uses fn:compare-based mutual comparability (XQ4 numeric total order, + * duration total order, date/time total order). + * * @author Wolfgang Meier */ public class FunMax extends CollatingFunction { - protected static final String FUNCTION_DESCRIPTION_COMMON_1 = - "Selects an item from the input sequence $arg whose value is " + - "greater than or equal to the value of every other item in the " + - "input sequence. If there are two or more such items, then the " + - "specific item whose value is returned is implementation dependent.\n\n" + - "The following rules are applied to the input sequence:\n\n" + - "- Values of type xs:untypedAtomic in $arg are cast to xs:double.\n" + - "- Numeric and xs:anyURI values are converted to the least common " + - "type that supports the 'ge' operator by a combination of type " + - "promotion and subtype substitution. See Section B.1 Type " + - "PromotionXP and Section B.2 Operator MappingXP.\n\n" + - "The items in the resulting sequence may be reordered in an arbitrary " + - "order. The resulting sequence is referred to below as the converted " + - "sequence. This function returns an item from the converted sequence " + - "rather than the input sequence.\n\n" + - "If the converted sequence is empty, the empty sequence is returned.\n\n" + - "All items in $arg must be numeric or derived from a single base type " + - "for which the 'ge' operator is defined. In addition, the values in the " + - "sequence must have a total order. If date/time values do not have a " + - "timezone, they are considered to have the implicit timezone provided " + - "by the dynamic context for purposes of comparison. Duration values " + - "must either all be xs:yearMonthDuration values or must all be " + - "xs:dayTimeDuration values.\n\n" + - "If any of these conditions is not met, then a type error is raised [err:FORG0006].\n\n" + - "If the converted sequence contains the value NaN, the value NaN is returned.\n\n" + - "If the items in the value of $arg are of type xs:string or types " + - "derived by restriction from xs:string, then the determination of " + - "the item with the largest value is made according to the collation " + - "that is used."; - protected static final String FUNCTION_DESCRIPTION_2_PARAM = - "If the type of the items in $arg is not xs:string " + - "and $collation-uri is specified, the collation is ignored.\n\n"; - protected static final String FUNCTION_DESCRIPTION_COMMON_2 = - "The collation used by the invocation of this function is " + - "determined according to the rules in 7.3.1 Collations."; - - - public final static FunctionSignature[] signatures = { - new FunctionSignature( - new QName("max", Function.BUILTIN_FUNCTION_NS), - FUNCTION_DESCRIPTION_COMMON_1 + - FUNCTION_DESCRIPTION_COMMON_2, - new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The input sequence") - }, - new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "the max value") - ), - new FunctionSignature( - new QName("max", Function.BUILTIN_FUNCTION_NS), - FUNCTION_DESCRIPTION_COMMON_1 + FUNCTION_DESCRIPTION_2_PARAM + - FUNCTION_DESCRIPTION_COMMON_2, - new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The input sequence"), - new FunctionParameterSequenceType("collation-uri", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI") - }, - new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "the max value") - ) - }; - - public FunMax(XQueryContext context, FunctionSignature signature) { - super(context, signature); - } - - /* (non-Javadoc) - * @see org.exist.xquery.Expression#eval(org.exist.dom.persistent.DocumentSet, org.exist.xquery.value.Sequence, org.exist.xquery.value.Item) - */ - public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { + public final static FunctionSignature[] signatures = { + new FunctionSignature( + new QName("max", Function.BUILTIN_FUNCTION_NS), + "Returns the maximum value from the input sequence, using XQ4 comparison semantics.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, + Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, + "the maximum value") + ), + new FunctionSignature( + new QName("max", Function.BUILTIN_FUNCTION_NS), + "Returns the maximum value from the input sequence, using the specified collation.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, + Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, + Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, + "the maximum value") + ) + }; + + public FunMax(XQueryContext context, FunctionSignature signature) { + super(context, signature); + } + + public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { if (context.getProfiler().isEnabled()) { - context.getProfiler().start(this); - context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies())); + context.getProfiler().start(this); + context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", + Dependency.getDependenciesName(this.getDependencies())); if (contextSequence != null) - {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);} + {context.getProfiler().message(this, Profiler.START_SEQUENCES, + "CONTEXT SEQUENCE", contextSequence);} if (contextItem != null) - {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());} - } - + {context.getProfiler().message(this, Profiler.START_SEQUENCES, + "CONTEXT ITEM", contextItem.toSequence());} + } + Sequence result; - final Sequence arg = getArgument(0).eval(contextSequence, contextItem); - if(arg.isEmpty()) - {result = Sequence.EMPTY_SEQUENCE;} - else { - boolean computableProcessing = false; - //TODO : test if a range index is defined *iff* it is compatible with the collator - final Collator collator = getCollator(contextSequence, contextItem, 2); - final SequenceIterator iter = arg.unorderedIterator(); - AtomicValue max = null; - while (iter.hasNext()) { - final Item item = iter.nextItem(); - - if (item instanceof QNameValue) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(item.getType()), arg);} - - AtomicValue value = item.atomize(); - - //Duration values must either all be xs:yearMonthDuration values or must all be xs:dayTimeDuration values. - if (Type.subTypeOf(value.getType(), Type.DURATION)) { - value = ((DurationValue)value).wrap(); - if (value.getType() == Type.YEAR_MONTH_DURATION) { - if (max != null && max.getType() != Type.YEAR_MONTH_DURATION) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(max.getType()) + - " and " + Type.getTypeName(value.getType()), value);} - - } else if (value.getType() == Type.DAY_TIME_DURATION) { - if (max != null && max.getType() != Type.DAY_TIME_DURATION) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(max.getType()) + - " and " + Type.getTypeName(value.getType()), value);} - - } else - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(value.getType()), value);} - - //Any value of type xdt:untypedAtomic is cast to xs:double - } else if (value.getType() == Type.UNTYPED_ATOMIC) - {value = value.convertTo(Type.DOUBLE);} - - if (max == null) - {max = value;} - - else { - if (Type.getCommonSuperType(max.getType(), value.getType()) == Type.ANY_ATOMIC_TYPE) { - throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(max.getType()) + - " and " + Type.getTypeName(value.getType()), max); - } - //Any value of type xdt:untypedAtomic is cast to xs:double - if (value.getType() == Type.UNTYPED_ATOMIC) - {value = value.convertTo(Type.DOUBLE);} - - //Numeric tests - if (Type.subTypeOfUnion(value.getType(), Type.NUMERIC)) { - //Don't mix comparisons - if (!Type.subTypeOfUnion(max.getType(), Type.NUMERIC)) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(max.getType()) + - " and " + Type.getTypeName(value.getType()), max);} - if (((NumericValue) value).isNaN()) { - //Type NaN correctly - value = value.promote(max); - if (value.getType() == Type.FLOAT) - {max = FloatValue.NaN;} - else - {max = DoubleValue.NaN;} - //although result will be NaN, we need to continue on order to type correctly - continue; - } else - {max = max.promote(value);} - } - //Ugly test - if (max instanceof ComputableValue && value instanceof ComputableValue) { - //Type value correctly - value = value.promote(max); - max = (ComputableValue) max.max(collator, value); - computableProcessing = true; - } else { - if (computableProcessing) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(max.getType()) + - " and " + Type.getTypeName(value.getType()), max);} - if (Collations.compare(collator, value.getStringValue(), max.getStringValue()) > 0) - {max = value;} - } + final Sequence arg = getArgument(0).eval(contextSequence, contextItem); + if (arg.isEmpty()) { + result = Sequence.EMPTY_SEQUENCE; + } else { + final Collator collator = getOptionalCollator(contextSequence, contextItem); + result = findMax(arg, collator); + } + + if (context.getProfiler().isEnabled()) + {context.getProfiler().end(this, "", result);} + + return result; + } + + /** + * Get collator, handling empty sequence for XQ4 optional collation parameter. + */ + private Collator getOptionalCollator(Sequence contextSequence, Item contextItem) + throws XPathException { + if (getArgumentCount() == 2) { + final Sequence collationSeq = getArgument(1).eval(contextSequence, contextItem); + if (!collationSeq.isEmpty()) { + final String collationURI = collationSeq.getStringValue(); + return context.getCollator(collationURI, ErrorCodes.FOCH0002); + } + } + return context.getDefaultCollator(); + } + + private Sequence findMax(Sequence arg, Collator collator) throws XPathException { + final SequenceIterator iter = arg.unorderedIterator(); + AtomicValue max = null; + boolean hasNaN = false; + AtomicValue nanValue = null; + + while (iter.hasNext()) { + final Item item = iter.nextItem(); + AtomicValue value = item.atomize(); + + // Cast untypedAtomic to double + if (value.getType() == Type.UNTYPED_ATOMIC) { + value = value.convertTo(Type.DOUBLE); + } + + // Wrap duration subtypes + if (Type.subTypeOf(value.getType(), Type.DURATION)) { + value = ((DurationValue) value).wrap(); + } + + // Track NaN: if any value is NaN, result is NaN + if (value instanceof NumericValue && ((NumericValue) value).isNaN()) { + if (!hasNaN) { + hasNaN = true; + nanValue = value; + } + continue; + } + + if (max == null) { + max = value; + } else { + try { + final int cmp = FunCompare.compare(value, max, collator); + if (cmp > 0) { + max = value; + } + } catch (final XPathException e) { + throw new XPathException(this, ErrorCodes.FORG0006, + "Cannot compare " + Type.getTypeName(max.getType()) + + " and " + Type.getTypeName(value.getType()), value); } - } - result = max; + } } - if (context.getProfiler().isEnabled()) - {context.getProfiler().end(this, "", result);} - - return result; - - } + if (hasNaN) { + return nanValue; + } + return max; + } } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMin.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMin.java index c98ce39133a..10d58c59c4d 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMin.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMin.java @@ -23,8 +23,8 @@ import com.ibm.icu.text.Collator; import org.exist.dom.QName; -import org.exist.util.Collations; import org.exist.xquery.Cardinality; +import org.exist.xquery.Constants; import org.exist.xquery.Dependency; import org.exist.xquery.ErrorCodes; import org.exist.xquery.Function; @@ -33,193 +33,145 @@ import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.AtomicValue; -import org.exist.xquery.value.ComputableValue; -import org.exist.xquery.value.DoubleValue; import org.exist.xquery.value.DurationValue; -import org.exist.xquery.value.FloatValue; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.Item; import org.exist.xquery.value.NumericValue; -import org.exist.xquery.value.QNameValue; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceIterator; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; /** + * Implementation of fn:min with XQuery 4.0 semantics. + * Uses fn:compare-based mutual comparability (XQ4 numeric total order, + * duration total order, date/time total order). + * * @author Wolfgang Meier */ public class FunMin extends CollatingFunction { - protected static final String FUNCTION_DESCRIPTION_COMMON_1 = - - "Selects an item from the input sequence $arg whose value is " + - "less than or equal to the value of every other item in the " + - "input sequence. If there are two or more such items, then " + - "the specific item whose value is returned is implementation dependent.\n\n" + - "The following rules are applied to the input sequence:\n\n" + - "- Values of type xs:untypedAtomic in $arg are cast to xs:double.\n" + - "- Numeric and xs:anyURI values are converted to the least common " + - "type that supports the 'le' operator by a combination of type promotion " + - "and subtype substitution. See Section B.1 Type PromotionXP and " + - "Section B.2 Operator MappingXP.\n\n" + - - "The items in the resulting sequence may be reordered in an arbitrary " + - "order. The resulting sequence is referred to below as the converted " + - "sequence. This function returns an item from the converted sequence " + - "rather than the input sequence.\n\n" + - - "If the converted sequence is empty, the empty sequence is returned.\n\n" + - - "All items in $arg must be numeric or derived from a single base type " + - "for which the 'le' operator is defined. In addition, the values in the " + - "sequence must have a total order. If date/time values do not have a " + - "timezone, they are considered to have the implicit timezone provided " + - "by the dynamic context for the purpose of comparison. Duration values " + - "must either all be xs:yearMonthDuration values or must all be " + - "xs:dayTimeDuration values.\n\n" + - - "If any of these conditions is not met, a type error is raised [err:FORG0006].\n\n" + - - "If the converted sequence contains the value NaN, the value NaN is returned.\n\n" + - - "If the items in the value of $arg are of type xs:string or types derived " + - "by restriction from xs:string, then the determination of the item with " + - "the smallest value is made according to the collation that is used. "; - protected static final String FUNCTION_DESCRIPTION_2_PARAM = - "If the type of the items in $arg is not xs:string and $collation is " + - "specified, the collation is ignored.\n\n"; - protected static final String FUNCTION_DESCRIPTION_COMMON_2 = - "The collation used by the invocation of this function is determined " + - "according to the rules in 7.3.1 Collations."; - - public final static FunctionSignature[] signatures = { - new FunctionSignature( - new QName("min", Function.BUILTIN_FUNCTION_NS), - FUNCTION_DESCRIPTION_COMMON_1 + - FUNCTION_DESCRIPTION_COMMON_2, - new SequenceType[] { new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The input sequence")}, - new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "the minimum value") - ), - new FunctionSignature( - new QName("min", Function.BUILTIN_FUNCTION_NS), - FUNCTION_DESCRIPTION_COMMON_1 + FUNCTION_DESCRIPTION_2_PARAM + - FUNCTION_DESCRIPTION_COMMON_2, - new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The input sequence"), - new FunctionParameterSequenceType("collation-uri", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI") - }, - new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "the minimum value") - ) - }; - - public FunMin(XQueryContext context, FunctionSignature signature) { - super(context, signature); - } - - /* (non-Javadoc) - * @see org.exist.xquery.Expression#eval(org.exist.dom.persistent.DocumentSet, org.exist.xquery.value.Sequence, org.exist.xquery.value.Item) - */ - public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { + public final static FunctionSignature[] signatures = { + new FunctionSignature( + new QName("min", Function.BUILTIN_FUNCTION_NS), + "Returns the minimum value from the input sequence, using XQ4 comparison semantics.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, + Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, + "the minimum value") + ), + new FunctionSignature( + new QName("min", Function.BUILTIN_FUNCTION_NS), + "Returns the minimum value from the input sequence, using the specified collation.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, + Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, + Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, + "the minimum value") + ) + }; + + public FunMin(XQueryContext context, FunctionSignature signature) { + super(context, signature); + } + + public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { if (context.getProfiler().isEnabled()) { - context.getProfiler().start(this); - context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies())); + context.getProfiler().start(this); + context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", + Dependency.getDependenciesName(this.getDependencies())); if (contextSequence != null) - {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);} + {context.getProfiler().message(this, Profiler.START_SEQUENCES, + "CONTEXT SEQUENCE", contextSequence);} if (contextItem != null) - {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());} + {context.getProfiler().message(this, Profiler.START_SEQUENCES, + "CONTEXT ITEM", contextItem.toSequence());} } - - boolean computableProcessing = false; + Sequence result; - final Sequence arg = getArgument(0).eval(contextSequence, contextItem); - if (arg.isEmpty()) - {result = Sequence.EMPTY_SEQUENCE;} - else { - //TODO : test if a range index is defined *iff* it is compatible with the collator - final Collator collator = getCollator(contextSequence, contextItem, 2); - final SequenceIterator iter = arg.unorderedIterator(); - AtomicValue min = null; - while (iter.hasNext()) { - final Item item = iter.nextItem(); - if (item instanceof QNameValue) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(item.getType()), arg);} - AtomicValue value = item.atomize(); - - //Duration values must either all be xs:yearMonthDuration values or must all be xs:dayTimeDuration values. - if (Type.subTypeOf(value.getType(), Type.DURATION)) { - value = ((DurationValue)value).wrap(); - if (value.getType() == Type.YEAR_MONTH_DURATION) { - if (min != null && min.getType() != Type.YEAR_MONTH_DURATION) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(min.getType()) + - " and " + Type.getTypeName(value.getType()), value);} - - } else if (value.getType() == Type.DAY_TIME_DURATION) { - if (min != null && min.getType() != Type.DAY_TIME_DURATION) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(min.getType()) + - " and " + Type.getTypeName(value.getType()), value);} - - } else - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(value.getType()), value);} - - //Any value of type xdt:untypedAtomic is cast to xs:double - } else if (value.getType() == Type.UNTYPED_ATOMIC) - {value = value.convertTo(Type.DOUBLE);} - - if (min == null) - {min = value;} - else { - if (Type.getCommonSuperType(min.getType(), value.getType()) == Type.ANY_ATOMIC_TYPE) { - throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(min.getType()) + - " and " + Type.getTypeName(value.getType()), value); - } - //Any value of type xdt:untypedAtomic is cast to xs:double - if (value.getType() == Type.ANY_ATOMIC_TYPE) - {value = value.convertTo(Type.DOUBLE);} - //Numeric tests - if (Type.subTypeOfUnion(value.getType(), Type.NUMERIC)) { - //Don't mix comparisons - if (!Type.subTypeOfUnion(min.getType(), Type.NUMERIC)) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(min.getType()) + - " and " + Type.getTypeName(value.getType()), min);} - if (((NumericValue) value).isNaN()) { - //Type NaN correctly - value = value.promote(min); - if (value.getType() == Type.FLOAT) - {min = FloatValue.NaN;} - else - {min = DoubleValue.NaN;} - //although result will be NaN, we need to continue on order to type correctly - continue; - } - min = min.promote(value); - } - //Ugly test - if (value instanceof ComputableValue) { - if (!(min instanceof ComputableValue)) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(min.getType()) + - " and " + Type.getTypeName(value.getType()), min);} - //Type value correctly - value = value.promote(min); - min = min.min(collator, value); - computableProcessing = true; - } else { - if (computableProcessing) - {throw new XPathException(this, ErrorCodes.FORG0006, "Cannot compare " + Type.getTypeName(min.getType()) + - " and " + Type.getTypeName(value.getType()), value);} - if (Collations.compare(collator, value.getStringValue(), min.getStringValue()) < 0) - {min = value;} - } - } - } - result = min; + final Sequence arg = getArgument(0).eval(contextSequence, contextItem); + if (arg.isEmpty()) { + result = Sequence.EMPTY_SEQUENCE; + } else { + final Collator collator = getOptionalCollator(contextSequence, contextItem); + result = findMin(arg, collator); + } + + if (context.getProfiler().isEnabled()) + {context.getProfiler().end(this, "", result);} + + return result; + } + + /** + * Get collator, handling empty sequence for XQ4 optional collation parameter. + */ + private Collator getOptionalCollator(Sequence contextSequence, Item contextItem) + throws XPathException { + if (getArgumentCount() == 2) { + final Sequence collationSeq = getArgument(1).eval(contextSequence, contextItem); + if (!collationSeq.isEmpty()) { + final String collationURI = collationSeq.getStringValue(); + return context.getCollator(collationURI, ErrorCodes.FOCH0002); + } } - - if (context.getProfiler().isEnabled()) - {context.getProfiler().end(this, "", result);} - - return result; + return context.getDefaultCollator(); } + private Sequence findMin(Sequence arg, Collator collator) throws XPathException { + final SequenceIterator iter = arg.unorderedIterator(); + AtomicValue min = null; + boolean hasNaN = false; + AtomicValue nanValue = null; + + while (iter.hasNext()) { + final Item item = iter.nextItem(); + AtomicValue value = item.atomize(); + + // Cast untypedAtomic to double + if (value.getType() == Type.UNTYPED_ATOMIC) { + value = value.convertTo(Type.DOUBLE); + } + + // Wrap duration subtypes + if (Type.subTypeOf(value.getType(), Type.DURATION)) { + value = ((DurationValue) value).wrap(); + } + + // Track NaN: if any value is NaN, result is NaN + if (value instanceof NumericValue && ((NumericValue) value).isNaN()) { + if (!hasNaN) { + hasNaN = true; + nanValue = value; + } + continue; + } + + if (min == null) { + min = value; + } else { + try { + final int cmp = FunCompare.compare(value, min, collator); + if (cmp < 0) { + min = value; + } + } catch (final XPathException e) { + throw new XPathException(this, ErrorCodes.FORG0006, + "Cannot compare " + Type.getTypeName(min.getType()) + + " and " + Type.getTypeName(value.getType()), value); + } + } + } + + if (hasNaN) { + return nanValue; + } + return min; + } } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunName.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunName.java index 922cac959b4..23388ceaad2 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunName.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunName.java @@ -80,7 +80,7 @@ public class FunName extends Function { new QName("name", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION_1_PARAM + FUNCTION_DESCRIPTION_COMMON, new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.NODE, Cardinality.ZERO_OR_ONE, "The input node") + new FunctionParameterSequenceType("node", Type.NODE, Cardinality.ZERO_OR_ONE, "The input node") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the name") ) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNamespaceURI.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNamespaceURI.java index b89ea61f02f..2422cb60f93 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNamespaceURI.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNamespaceURI.java @@ -77,7 +77,7 @@ public class FunNamespaceURI extends Function { new QName("namespace-uri", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION_1_PARAM + FUNCTION_DESCRIPTION_COMMON, new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.NODE, Cardinality.ZERO_OR_ONE, "The input node") + new FunctionParameterSequenceType("node", Type.NODE, Cardinality.ZERO_OR_ONE, "The input node") }, new FunctionReturnSequenceType(Type.ANY_URI, Cardinality.EXACTLY_ONE, "the namespace URI") ) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNamespaceURIForPrefix.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNamespaceURIForPrefix.java index 2e01a811b09..42d84515917 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNamespaceURIForPrefix.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNamespaceURIForPrefix.java @@ -61,7 +61,7 @@ public class FunNamespaceURIForPrefix extends BasicFunction { new QName("namespace-uri-for-prefix", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION, new SequenceType[] { - new FunctionParameterSequenceType("prefix", Type.STRING, Cardinality.ZERO_OR_ONE, "The namespace prefix"), + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The namespace prefix"), new FunctionParameterSequenceType("element", Type.ELEMENT, Cardinality.EXACTLY_ONE, "The element") }, new FunctionReturnSequenceType(Type.ANY_URI, Cardinality.ZERO_OR_ONE, "the namespace URI")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNodeName.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNodeName.java index 0059f144872..59e1828de07 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNodeName.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNodeName.java @@ -64,7 +64,7 @@ public class FunNodeName extends Function { "of nodes it returns the empty sequence. If $arg is the empty sequence, the " + "empty sequence is returned.", new SequenceType[]{ - new FunctionParameterSequenceType("arg", Type.NODE, Cardinality.ZERO_OR_ONE, "The input node") + new FunctionParameterSequenceType("node", Type.NODE, Cardinality.ZERO_OR_ONE, "The input node") }, new FunctionReturnSequenceType(Type.QNAME, Cardinality.ZERO_OR_ONE, "the expanded QName")) }; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNormalizeSpace.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNormalizeSpace.java index 8f4fb5e7808..54c201ba61a 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNormalizeSpace.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNormalizeSpace.java @@ -78,7 +78,7 @@ public class FunNormalizeSpace extends Function { new QName("normalize-space", Function.BUILTIN_FUNCTION_NS), FUNCTION_DESCRIPTION_1_PARAM + FUNCTION_DESCRIPTION_COMMON_1 + FUNCTION_DESCRIPTION_1_PARAM_1 + FUNCTION_DESCRIPTION_COMMON_2, - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to normalize")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to normalize")}, RETURN_TYPE ) }; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNormalizeUnicode.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNormalizeUnicode.java index 5a6038037d2..ceb850699d8 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNormalizeUnicode.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNormalizeUnicode.java @@ -84,8 +84,8 @@ public class FunNormalizeUnicode extends Function { "If the effective value of the $normalization-form is other than one of the values " + "supported by the implementation, then an error is raised [err:FOCH0003]."; - protected static final FunctionParameterSequenceType ARG_PARAM = new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The unicode string to normalize"); - protected static final FunctionParameterSequenceType NF_PARAM = new FunctionParameterSequenceType("normalization-form", Type.STRING, Cardinality.EXACTLY_ONE, "The normalization form"); + protected static final FunctionParameterSequenceType ARG_PARAM = new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The unicode string to normalize"); + protected static final FunctionParameterSequenceType NF_PARAM = new FunctionParameterSequenceType("form", Type.STRING, Cardinality.EXACTLY_ONE, "The normalization form"); protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the normalized text"); public final static FunctionSignature[] signatures = { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNot.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNot.java index 64c1389563f..5d7a33f5783 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNot.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNot.java @@ -51,7 +51,7 @@ public class FunNot extends Function { "value is false, and false if the effective boolean value is true. \n\n $arg is reduced to an effective boolean value by applying " + "the fn:boolean() function.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input items")}, + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input items")}, new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "the negated effective boolean value (ebv) of $arg")); @SuppressWarnings("unused") diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNumber.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNumber.java index 3ceb116edd4..28276f7f770 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNumber.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunNumber.java @@ -67,7 +67,7 @@ public class FunNumber extends Function { "following the rules of 17.1.3.2 Casting to xs:double. If the conversion " + "to xs:double fails, the xs:double value NaN is returned."; - protected static final FunctionParameterSequenceType ARG_PARAM = new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "The input item"); + protected static final FunctionParameterSequenceType ARG_PARAM = new FunctionParameterSequenceType("value", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "The input item"); protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the numerical value"); public final static FunctionSignature[] signatures = { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunOnFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunOnFunctions.java index 6fd3e12e78f..9f5d78fc7be 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunOnFunctions.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunOnFunctions.java @@ -24,6 +24,8 @@ import org.exist.dom.QName; import org.exist.xquery.*; import org.exist.xquery.Module; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.AtomicValue; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReference; import org.exist.xquery.value.FunctionReturnSequenceType; @@ -31,7 +33,9 @@ import org.exist.xquery.value.QNameValue; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; public class FunOnFunctions extends BasicFunction { @@ -59,8 +63,25 @@ public class FunOnFunctions extends BasicFunction { new SequenceType[] { new FunctionParameterSequenceType("function", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The function item") }, - new FunctionReturnSequenceType(Type.INTEGER, Cardinality.EXACTLY_ONE, - "The arity of the function.")) + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.EXACTLY_ONE, + "The arity of the function.")), + new FunctionSignature( + new QName("function-annotations", Function.BUILTIN_FUNCTION_NS), + "Returns the annotations of the function identified by a function item, " + + "as a sequence of single-entry maps.", + new SequenceType[] { + new FunctionParameterSequenceType("function", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The function item") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, + "A sequence of single-entry maps, each associating an annotation name (xs:QName) with its value(s).")), + new FunctionSignature( + new QName("function-identity", Function.BUILTIN_FUNCTION_NS), + "Returns a string that uniquely identifies the function item within the current execution scope.", + new SequenceType[] { + new FunctionParameterSequenceType("function", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The function item") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, + "A string uniquely identifying the function.")) }; public FunOnFunctions(XQueryContext context, FunctionSignature signature) { @@ -102,10 +123,14 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) {return Sequence.EMPTY_SEQUENCE;} else {return new QNameValue(this, context, qname);} - } else { - // isCalledAs("function-arity") + } else if (isCalledAs("function-arity")) { final FunctionReference ref = (FunctionReference) args[0].itemAt(0); return new IntegerValue(this, ref.getSignature().getArgumentCount()); + } else if (isCalledAs("function-annotations")) { + return evalFunctionAnnotations(args); + } else { + // isCalledAs("function-identity") + return evalFunctionIdentity(args); } } catch (final Exception e) { if (e instanceof XPathException) @@ -115,6 +140,71 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) } } + private Sequence evalFunctionAnnotations(final Sequence[] args) throws XPathException { + final FunctionReference ref = (FunctionReference) args[0].itemAt(0); + + // Try to find annotations from various paths through the function reference + Annotation[] annotations = ref.getSignature().getAnnotations(); + + // Fallback: check the underlying UserDefinedFunction's signature + if ((annotations == null || annotations.length == 0) && ref.getCall() != null) { + final Function func = ref.getCall().getFunction(); + if (func != null) { + annotations = func.getSignature().getAnnotations(); + } + } + + if (annotations == null || annotations.length == 0) { + return Sequence.EMPTY_SEQUENCE; + } + + return buildAnnotationMaps(annotations); + } + + private Sequence buildAnnotationMaps(final Annotation[] annotations) throws XPathException { + final ValueSequence result = new ValueSequence(annotations.length); + for (final Annotation annot : annotations) { + final QName annotName = annot.getName(); + final QNameValue qnameKey = new QNameValue(this, context, annotName); + + // Build the value sequence from annotation literal values + final LiteralValue[] literals = annot.getValue(); + final Sequence annotValues; + if (literals == null || literals.length == 0) { + annotValues = Sequence.EMPTY_SEQUENCE; + } else { + final ValueSequence vals = new ValueSequence(literals.length); + for (final LiteralValue lv : literals) { + vals.add((AtomicValue) lv.getValue()); + } + annotValues = vals; + } + + // Create a single-entry map: { annotName: values } + final MapType map = new MapType(this, context); + map.add(qnameKey, annotValues); + result.add(map); + } + return result; + } + + private Sequence evalFunctionIdentity(final Sequence[] args) throws XPathException { + final FunctionReference ref = (FunctionReference) args[0].itemAt(0); + final FunctionSignature sig = ref.getSignature(); + final QName qname = sig.getName(); + + final String identity; + if (qname != null && qname != InlineFunction.INLINE_FUNCTION_QNAME) { + // Named function: use namespace + local name + arity for stable identity + identity = "Q{" + (qname.getNamespaceURI() != null ? qname.getNamespaceURI() : "") + + "}" + qname.getLocalPart() + "#" + sig.getArgumentCount(); + } else { + // Anonymous function / inline / partial application: use object identity + identity = "anon#" + System.identityHashCode(ref.getCall()); + } + return new StringValue(this, identity); + } + public static FunctionCall lookupFunction(final Expression parent, final QName qname, final int arity) { // check if the function is from a module final Module[] modules = parent.getContext().getModules(qname.getNamespaceURI()); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunOneOrMore.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunOneOrMore.java index 7acb1986026..d483151faab 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunOneOrMore.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunOneOrMore.java @@ -50,7 +50,7 @@ public class FunOneOrMore extends Function { "Returns $arg if it contains one or more items. Otherwise, " + "raises an error.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ONE_OR_MORE, "the sequence passed in by $arg if it contains one or more items.")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRemove.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRemove.java index 5dd15172254..c75d391cbff 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRemove.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRemove.java @@ -58,8 +58,8 @@ public class FunRemove extends Function { "is greater than $position. If $target is the empty sequence, the empty sequence " + "is returned.", new SequenceType[] { - new FunctionParameterSequenceType("target", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), - new FunctionParameterSequenceType("position", Type.INTEGER, Cardinality.EXACTLY_ONE, "The position of the value to be removed") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("positions", Type.INTEGER, Cardinality.EXACTLY_ONE, "The position of the value to be removed") }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the new sequence with the item at the position specified by the value of $position removed.")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java index 6dea523469a..c460dc7937f 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java @@ -23,6 +23,8 @@ import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import net.sf.saxon.Configuration; import net.sf.saxon.functions.Replace; @@ -30,9 +32,12 @@ import org.exist.dom.QName; import org.exist.xquery.*; import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.Item; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.StringValue; import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; import static org.exist.xquery.FunctionDSL.*; import static org.exist.xquery.regex.RegexUtil.*; @@ -72,7 +77,9 @@ public class FunReplace extends BasicFunction { private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_INPUT = optParam("input", Type.STRING, "The input string"); private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_PATTERN = param("pattern", Type.STRING, "The pattern to match"); - private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_REPLACEMENT = param("replacement", Type.STRING, "The string to replace the pattern with"); + private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_REPLACEMENT = + new FunctionParameterSequenceType("replacement", Type.ITEM, Cardinality.ZERO_OR_ONE, + "The replacement string, function, or empty sequence"); static final FunctionSignature [] FS_REPLACE = functionSignatures( FS_REPLACE_NAME, @@ -88,7 +95,7 @@ public class FunReplace extends BasicFunction { FS_TOKENIZE_PARAM_INPUT, FS_TOKENIZE_PARAM_PATTERN, FS_TOKENIZE_PARAM_REPLACEMENT, - param("flags", Type.STRING, Cardinality.EXACTLY_ONE, "The flags") + param("flags", Type.STRING, Cardinality.ZERO_OR_ONE, "The flags") ) ) ); @@ -104,36 +111,77 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro if (stringArg.isEmpty()) { result = StringValue.EMPTY_STRING; } else { - final String flags; - if (args.length == 4) { + String flags; + if (args.length == 4 && !args[3].isEmpty()) { flags = args[3].itemAt(0).getStringValue(); } else { flags = ""; } + + // XQ4: '!' flag — XPath mode (allows empty matches, etc.) + // Strip before passing to regex engine which doesn't recognize it + final boolean hasXPathFlag = flags.contains("!"); + if (hasXPathFlag) { + flags = flags.replace("!", ""); + } + + // XQ4: 'c' flag — strip regex comments (#...#) before compilation + // When 'q' (literal) flag is present, 'c' is ignored + final boolean hasCommentFlag = flags.indexOf('c') >= 0 && flags.indexOf('q') < 0; + if (flags.indexOf('c') >= 0) { + flags = flags.replace("c", ""); + } + final String string = stringArg.getStringValue(); - final String pattern = args[1].itemAt(0).getStringValue(); - final String replace = args[2].itemAt(0).getStringValue(); + String pattern = args[1].itemAt(0).getStringValue(); - final Configuration config = context.getBroker().getBrokerPool().getSaxonConfiguration(); + if (hasCommentFlag) { + pattern = stripRegexComments(pattern); + } + // XQ4: 3rd arg can be empty sequence (treated as empty string) or a function + final Sequence replacementArg = args[2]; + final boolean isFunctionReplacement = !replacementArg.isEmpty() + && Type.subTypeOf(replacementArg.itemAt(0).getType(), Type.FUNCTION); + final String replace; + if (isFunctionReplacement) { + replace = null; // handled below + } else if (replacementArg.isEmpty()) { + replace = ""; + } else { + replace = replacementArg.itemAt(0).getStringValue(); + } + + final Configuration config = context.getBroker().getBrokerPool().getSaxonConfiguration(); final List warnings = new ArrayList<>(1); try { final RegularExpression regularExpression = config.compileRegularExpression(pattern, flags, "XP30", warnings); - if (regularExpression.matches("")) { - throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string"); - } + final boolean canMatchEmpty = regularExpression.matches(""); + final boolean allowEmptyMatch = hasXPathFlag || context.getXQueryVersion() >= 40; - //TODO(AR) cache the regular expression... might be possible through Saxon config + if (canMatchEmpty && !allowEmptyMatch) { + throw new XPathException(this, ErrorCodes.FORX0003, + "Regular expression matches zero-length string"); + } - if (!hasLiteral(flags)) { - final String msg = Replace.checkReplacement(replace); - if (msg != null) { - throw new XPathException(this, ErrorCodes.FORX0004, msg); + if (isFunctionReplacement) { + result = evalFunctionReplacement(string, pattern, flags, + (FunctionReference) replacementArg.itemAt(0)); + } else if (canMatchEmpty) { + // XQ4: empty-matching regex allowed — use Java regex fallback + // since Saxon's replace() doesn't handle empty matches well + result = evalEmptyMatchReplace(string, pattern, replace, flags); + } else { + if (!hasLiteral(flags)) { + final String msg = Replace.checkReplacement(replace); + if (msg != null) { + throw new XPathException(this, ErrorCodes.FORX0004, msg); + } } + final CharSequence res = regularExpression.replace(string, replace); + result = new StringValue(this, res.toString()); } - final CharSequence res = regularExpression.replace(string, replace); - result = new StringValue(this, res.toString()); } catch (final net.sf.saxon.trans.XPathException e) { switch (e.getErrorCodeLocalPart()) { @@ -145,7 +193,183 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro } } } - + return result; } + + /** + * XQ4: Handle replacement when the regex can match the empty string. + * Uses Java regex with XPath-to-Java translation for proper empty-match handling. + */ + private Sequence evalEmptyMatchReplace(final String input, final String pattern, + final String replace, final String flags) throws XPathException { + final String javaPattern = org.exist.xquery.regex.RegexUtil.translateRegexp( + this, pattern, hasIgnoreWhitespace(flags), hasCaseInsensitive(flags)); + final int javaFlags = parseFlags(this, flags); + final Pattern compiled = Pattern.compile(javaPattern, javaFlags); + final Matcher matcher = compiled.matcher(input); + + final StringBuilder sb = new StringBuilder(); + int lastEnd = 0; + while (matcher.find()) { + sb.append(input, lastEnd, matcher.start()); + + // Apply XPath-style replacement ($0, $1, etc.) + sb.append(applyXPathReplacement(replace, matcher)); + + lastEnd = matcher.end(); + + // Advance past empty match to prevent infinite loop + if (matcher.start() == matcher.end()) { + if (lastEnd < input.length()) { + sb.append(input.charAt(lastEnd)); + lastEnd++; + matcher.region(lastEnd, input.length()); + } else { + break; + } + } + } + sb.append(input, lastEnd, input.length()); + return new StringValue(this, sb.toString()); + } + + /** + * Apply XPath-style replacement string ($0, $1, etc.) using a Java Matcher. + */ + private static String applyXPathReplacement(final String replacement, final Matcher matcher) { + final StringBuilder result = new StringBuilder(); + for (int i = 0; i < replacement.length(); i++) { + final char ch = replacement.charAt(i); + if (ch == '$' && i + 1 < replacement.length()) { + i++; + int groupNum = 0; + boolean hasDigit = false; + while (i < replacement.length() && Character.isDigit(replacement.charAt(i))) { + groupNum = groupNum * 10 + (replacement.charAt(i) - '0'); + hasDigit = true; + i++; + } + i--; // back up one + if (hasDigit && groupNum <= matcher.groupCount()) { + final String g = matcher.group(groupNum); + if (g != null) { + result.append(g); + } + } else if (hasDigit) { + // Group doesn't exist, output empty + } + } else if (ch == '\\' && i + 1 < replacement.length()) { + i++; + result.append(replacement.charAt(i)); + } else { + result.append(ch); + } + } + return result.toString(); + } + + /** + * XQ4: Evaluate fn:replace with a function replacement parameter. + * The function receives (match, groups*) and returns the replacement string. + */ + private Sequence evalFunctionReplacement(final String input, final String pattern, + final String flags, final FunctionReference func) throws XPathException { + // Use Java regex for function replacement since Saxon's replace() only accepts strings + final String javaPattern = org.exist.xquery.regex.RegexUtil.translateRegexp( + this, pattern, hasIgnoreWhitespace(flags), hasCaseInsensitive(flags)); + int javaFlags = parseFlags(this, flags); + final Pattern compiled = Pattern.compile(javaPattern, javaFlags); + final Matcher matcher = compiled.matcher(input); + + final StringBuilder sb = new StringBuilder(); + int lastEnd = 0; + while (matcher.find()) { + sb.append(input, lastEnd, matcher.start()); + + // Build arguments: (match, group1, group2, ...) + final int groupCount = matcher.groupCount(); + final Sequence[] funcArgs = new Sequence[2]; + funcArgs[0] = new StringValue(this, matcher.group()); + final ValueSequence groups = new ValueSequence(groupCount); + for (int i = 1; i <= groupCount; i++) { + final String g = matcher.group(i); + groups.add(g != null ? new StringValue(this, g) : StringValue.EMPTY_STRING); + } + funcArgs[1] = groups; + + final Sequence replacement = func.evalFunction(null, null, funcArgs); + if (!replacement.isEmpty()) { + sb.append(replacement.getStringValue()); + } + + lastEnd = matcher.end(); + + // Prevent infinite loop on empty match + if (matcher.start() == matcher.end()) { + if (lastEnd < input.length()) { + sb.append(input.charAt(lastEnd)); + lastEnd++; + // Reset matcher position + matcher.region(lastEnd, input.length()); + } else { + break; + } + } + } + sb.append(input, lastEnd, input.length()); + return new StringValue(this, sb.toString()); + } + + /** + * XQ4: Strip regex comments (c flag). + * Removes text between # markers: #comment# becomes empty. + * A # at end of pattern (no closing #) is treated as end-of-line comment. + * Escaped \# is preserved. + */ + static String stripRegexComments(final String pattern) { + final StringBuilder result = new StringBuilder(pattern.length()); + boolean inComment = false; + boolean inCharClass = false; + for (int i = 0; i < pattern.length(); i++) { + final char ch = pattern.charAt(i); + if (ch == '\\' && i + 1 < pattern.length()) { + final char next = pattern.charAt(i + 1); + if (!inComment) { + if (next == '#') { + // \# in c-flag mode is a literal # — output just # + result.append('#'); + } else { + result.append(ch); + result.append(next); + } + } + i++; // skip escaped character + } else if (inCharClass) { + // Inside [...] character class, # is literal + if (ch == ']') { + inCharClass = false; + } + if (!inComment) { + result.append(ch); + } + } else if (ch == '[' && !inComment) { + inCharClass = true; + result.append(ch); + } else if (ch == '#' && !inCharClass) { + inComment = !inComment; + } else if (!inComment) { + result.append(ch); + } + } + return result.toString(); + } + + private static boolean hasCaseInsensitive(final String flags) { + return flags.contains("i"); + } + + private static boolean hasIgnoreWhitespace(final String flags) { + return flags.contains("x"); + } } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunResolveQName.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunResolveQName.java index 6eb63ed7ff5..e8bbc5b3a03 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunResolveQName.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunResolveQName.java @@ -68,7 +68,7 @@ public class FunResolveQName extends BasicFunction { "resulting expanded-QName has no namespace part.\n\nThe prefix (or absence of a prefix) in the " + "supplied $qname argument is retained in the returned expanded-QName.", new SequenceType[] { - new FunctionParameterSequenceType("qname", Type.STRING, Cardinality.ZERO_OR_ONE, "The QName name"), + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The QName name"), new FunctionParameterSequenceType("element", Type.ELEMENT, Cardinality.EXACTLY_ONE, "The element") }, new FunctionReturnSequenceType(Type.QNAME, Cardinality.ZERO_OR_ONE, "the QName of $element with lexical form $qname") diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunResolveURI.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunResolveURI.java index 150963daad2..77004a3ae22 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunResolveURI.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunResolveURI.java @@ -68,7 +68,7 @@ public class FunResolveURI extends Function { "is raised [err:FORG0002].\n\n" + "If $relative is the empty sequence, the empty sequence is returned."; - protected static final FunctionParameterSequenceType RELATIVE_ARG = new FunctionParameterSequenceType("relative", Type.STRING, Cardinality.ZERO_OR_ONE, "The relative URI"); + protected static final FunctionParameterSequenceType RELATIVE_ARG = new FunctionParameterSequenceType("href", Type.STRING, Cardinality.ZERO_OR_ONE, "The relative URI"); protected static final FunctionParameterSequenceType BASE_ARG = new FunctionParameterSequenceType("base", Type.STRING, Cardinality.EXACTLY_ONE, "The base URI"); protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.ANY_URI, Cardinality.ZERO_OR_ONE, "the absolute URI"); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReverse.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReverse.java index 0774d01ccdb..936451f96b1 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReverse.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReverse.java @@ -44,7 +44,7 @@ public class FunReverse extends Function { new QName("reverse", Function.BUILTIN_FUNCTION_NS), "Reverses the order of items in a sequence. If the argument is an empty" + "sequence, the empty sequence is returned.", - new SequenceType[] {new FunctionParameterSequenceType("arg", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence to reverse")}, + new SequenceType[] {new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence to reverse")}, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the reverse order sequence")); public FunReverse(XQueryContext context) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRound.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRound.java index 4ad0bb8cf7b..2b4286f5c15 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRound.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRound.java @@ -68,7 +68,11 @@ public class FunRound extends FunRoundBase { optParam("arg", Type.NUMERIC, "The input number")), functionSignature(FN_NAME, FunRound.description, FunRound.returnType, optParam("arg", Type.NUMERIC, "The input number"), - optParam("precision", Type.INTEGER, "The input number")) + optParam("precision", Type.INTEGER, "The precision")), + functionSignature(FN_NAME, FunRound.description, FunRound.returnType, + optParam("arg", Type.NUMERIC, "The input number"), + optParam("precision", Type.INTEGER, "The precision"), + optParam("mode", Type.STRING, "The rounding mode")) }; public FunRound(final XQueryContext context, final FunctionSignature signature) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRoundBase.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRoundBase.java index ba7b1050a3b..85080cd0e17 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRoundBase.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRoundBase.java @@ -25,6 +25,7 @@ import org.exist.xquery.value.*; import java.math.RoundingMode; +import java.util.Map; import java.util.Objects; /** @@ -45,6 +46,18 @@ public int returnsType() { abstract protected RoundingMode getFunctionRoundingMode(NumericValue value); + private static final Map ROUNDING_MODE_MAP = Map.of( + "floor", "FLOOR", + "ceiling", "CEILING", + "toward-zero", "DOWN", + "away-from-zero", "UP", + "half-to-floor", "HALF_FLOOR", + "half-to-ceiling", "HALF_CEILING", + "half-toward-zero", "HALF_DOWN", + "half-away-from-zero", "HALF_UP", + "half-to-even", "HALF_EVEN" + ); + @Override public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { @@ -60,9 +73,15 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro value = (NumericValue) item.convertTo(Type.NUMERIC); } - final RoundingMode roundingMode = getFunctionRoundingMode(value); + // Determine rounding mode: 3-arg form overrides the function default + final RoundingMode roundingMode; + if (args.length > 2 && !args[2].isEmpty()) { + roundingMode = parseRoundingMode(args[2].getStringValue(), value); + } else { + roundingMode = getFunctionRoundingMode(value); + } - if (args.length > 1) { + if (args.length > 1 && !args[1].isEmpty()) { final Item precisionItem = args[1].itemAt(0); if (precisionItem instanceof IntegerValue precision) { return convertValue(precision, value, roundingMode, this); @@ -72,6 +91,27 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro return convertValue(IntegerValue.ZERO, value, roundingMode, this); } + private RoundingMode parseRoundingMode(final String mode, final NumericValue value) throws XPathException { + // XQ4 rounding modes that map directly to Java RoundingMode + switch (mode) { + case "floor": return RoundingMode.FLOOR; + case "ceiling": return RoundingMode.CEILING; + case "toward-zero": return RoundingMode.DOWN; + case "away-from-zero": return RoundingMode.UP; + case "half-to-even": return RoundingMode.HALF_EVEN; + case "half-away-from-zero": return RoundingMode.HALF_UP; + case "half-toward-zero": return RoundingMode.HALF_DOWN; + // half-to-floor and half-to-ceiling need special handling based on sign + case "half-to-floor": + return value.isNegative() ? RoundingMode.HALF_UP : RoundingMode.HALF_DOWN; + case "half-to-ceiling": + return value.isNegative() ? RoundingMode.HALF_DOWN : RoundingMode.HALF_UP; + default: + throw new XPathException(this, ErrorCodes.XPTY0004, + "Unknown rounding mode: '" + mode + "'"); + } + } + /** * Apply necessary conversions to/from decimal to perform rounding in decimal * diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRoundHalfToEven.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRoundHalfToEven.java index 3846d834e05..55f63883aff 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRoundHalfToEven.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunRoundHalfToEven.java @@ -81,9 +81,9 @@ public class FunRoundHalfToEven extends FunRoundBase { public static final FunctionSignature[] FN_ROUND_HALF_TO_EVEN_SIGNATURES = { functionSignature(FN_NAME, FunRoundHalfToEven.FUNCTION_DESCRIPTION_1_PARAM + FunRoundHalfToEven.FUNCTION_DESCRIPTION_COMMON, FunRoundHalfToEven.RETURN_TYPE, - optParam("arg", Type.NUMERIC, "The input number")), + optParam("value", Type.NUMERIC, "The input number")), functionSignature(FN_NAME, FunRoundHalfToEven.FUNCTION_DESCRIPTION_2_PARAM + FunRoundHalfToEven.FUNCTION_DESCRIPTION_COMMON, RETURN_TYPE, - optParam("arg", Type.NUMERIC, "The input number"), + optParam("value", Type.NUMERIC, "The input number"), optParam("precision", Type.INTEGER, "Precision to round to")) }; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSerialize.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSerialize.java index 24d6c89ddf6..76fa07d0057 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSerialize.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSerialize.java @@ -35,6 +35,8 @@ import org.w3c.dom.Element; import org.xml.sax.SAXException; +import javax.xml.transform.OutputKeys; + import java.io.IOException; import java.io.StringWriter; import java.util.Properties; @@ -51,7 +53,7 @@ public class FunSerialize extends BasicFunction { "This function serializes the supplied input sequence $arg as described in XSLT and XQuery Serialization 3.0, returning the " + "serialized representation of the sequence as a string.", new SequenceType[] { - new FunctionParameterSequenceType("args", Type.ITEM, Cardinality.ZERO_OR_MORE, "The node set to serialize") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The node set to serialize") }, new FunctionParameterSequenceType("result", Type.STRING, Cardinality.EXACTLY_ONE, "the string containing the serialized node set.") ), @@ -60,8 +62,8 @@ public class FunSerialize extends BasicFunction { "This function serializes the supplied input sequence $arg as described in XSLT and XQuery Serialization 3.0, returning the " + "serialized representation of the sequence as a string.", new SequenceType[] { - new FunctionParameterSequenceType("args", Type.ITEM, Cardinality.ZERO_OR_MORE, "The node set to serialize"), - new FunctionParameterSequenceType("parameters", Type.ITEM, Cardinality.ZERO_OR_ONE, "The serialization parameters as either a output:serialization-parameters element or a map") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The node set to serialize"), + new FunctionParameterSequenceType("options", Type.ITEM, Cardinality.ZERO_OR_ONE, "The serialization parameters as either a output:serialization-parameters element or a map") }, new FunctionParameterSequenceType("result", Type.STRING, Cardinality.EXACTLY_ONE, "the string containing the serialized node set.") ) @@ -80,6 +82,9 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce outputProperties = new Properties(); } + // SEPM0009: validate parameter consistency before serializing + validateSerializationParams(outputProperties); + try(final StringWriter writer = new StringWriter()) { final XQuerySerializer xqSerializer = new XQuerySerializer(context.getBroker(), outputProperties, writer); @@ -130,6 +135,42 @@ private static boolean isSerializationParametersElement(final Item item) { } } + /** + * Check if a serialization boolean parameter value is true. + * W3C Serialization 3.1 accepts "yes", "true", "1" (with optional whitespace) as true. + */ + private static boolean isBooleanTrue(final String value) { + if (value == null) { + return false; + } + final String trimmed = value.trim(); + return "yes".equals(trimmed) || "true".equals(trimmed) || "1".equals(trimmed); + } + + /** + * Validate serialization parameter consistency per W3C Serialization 3.1. + * Throws SEPM0009 if omit-xml-declaration=yes conflicts with standalone or + * version+doctype-system. + */ + private void validateSerializationParams(final Properties props) throws XPathException { + final String omitXmlDecl = props.getProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + if (isBooleanTrue(omitXmlDecl)) { + // SEPM0009: standalone must be omit (absent) when omit-xml-declaration=yes + final String standalone = props.getProperty(OutputKeys.STANDALONE); + if (standalone != null) { + throw new XPathException(this, ErrorCodes.SEPM0009, + "omit-xml-declaration is yes but standalone is set to '" + standalone + "'"); + } + // SEPM0009: version != 1.0 with doctype-system when omit-xml-declaration=yes + final String version = props.getProperty(OutputKeys.VERSION); + final String doctypeSystem = props.getProperty(OutputKeys.DOCTYPE_SYSTEM); + if (version != null && !"1.0".equals(version) && doctypeSystem != null) { + throw new XPathException(this, ErrorCodes.SEPM0009, + "omit-xml-declaration is yes with version '" + version + "' and doctype-system set"); + } + } + } + /** * Sequence normalization as described in * XSLT and XQuery Serialization 3.0 - Sequence Normalization. diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStrLength.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStrLength.java index dbc3eac6b3e..706ed7a5c59 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStrLength.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStrLength.java @@ -55,7 +55,7 @@ public class FunStrLength extends Function { "If the value of $arg is the empty sequence, the xs:integer 0 is returned.\n" + "If no argument is supplied, $arg defaults to the string value (calculated using fn:string()) of the context item (.). If no argument is supplied or if the argument is the context item and the context item is undefined an error is raised", new SequenceType[]{ - new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string") + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string") }, new FunctionReturnSequenceType(Type.INTEGER, Cardinality.EXACTLY_ONE, "the length in characters") ) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunString.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunString.java index af79dd4ac02..c81469b4b10 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunString.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunString.java @@ -58,7 +58,7 @@ public class FunString extends Function { "If the value of $arg is the empty sequence, the zero-length string is returned. " + "If the context item of $arg is undefined, an error is raised.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ITEM, Cardinality.ZERO_OR_ONE, "The sequence to get the value of as an xs:string")}, + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_ONE, "The sequence to get the value of as an xs:string")}, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the value of $arg as an xs:string") ) }; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStringJoin.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStringJoin.java index dfd3c4e639c..71a63a21233 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStringJoin.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStringJoin.java @@ -55,7 +55,7 @@ public class FunStringJoin extends BasicFunction { "The effect of calling the single-argument version of this function is the same as calling the " + "two-argument version with $separator set to a zero-length string.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The sequence to be joined to form the string. If it is empty, " + "a zero-length string is returned.") }, @@ -66,7 +66,7 @@ public class FunStringJoin extends BasicFunction { "$arg sequence using $separator as a separator. If the value of the separator is the zero-length " + "string, then the members of the sequence are concatenated without a separator.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The sequence to be joined to form the string. If it is empty, " + "a zero-length string is returned."), new FunctionParameterSequenceType("separator", Type.STRING, Cardinality.EXACTLY_ONE, "The separator to be placed in the string between the items of $arg") diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStringToCodepoints.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStringToCodepoints.java index 9b11b8ca6c5..9b40beeccac 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStringToCodepoints.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunStringToCodepoints.java @@ -41,7 +41,7 @@ public class FunStringToCodepoints extends BasicFunction { "Returns the sequence of unicode code points that constitute an xs:string. If $arg is a zero-length " + "string or the empty sequence, the empty sequence is returned.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"), + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"), }, new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_MORE, "the sequence of code points")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubSequence.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubSequence.java index e66fbc857ec..88375870e7c 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubSequence.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubSequence.java @@ -41,8 +41,8 @@ public class FunSubSequence extends Function { + "items starting at the position, $starting-at, " + "up to the end of the sequence are included.", new SequenceType[]{ - new FunctionParameterSequenceType("source", Type.ITEM, Cardinality.ZERO_OR_MORE, "The source sequence"), - new FunctionParameterSequenceType("starting-at", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The starting position in the $source") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The source sequence"), + new FunctionParameterSequenceType("start", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The starting position in the $source") }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the subsequence")), new FunctionSignature( @@ -51,8 +51,8 @@ public class FunSubSequence extends Function { + "starting at the position, $starting-at, " + "including the number of items indicated by $length.", new SequenceType[]{ - new FunctionParameterSequenceType("source", Type.ITEM, Cardinality.ZERO_OR_MORE, "The source sequence"), - new FunctionParameterSequenceType("starting-at", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The starting position in the $source"), + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The source sequence"), + new FunctionParameterSequenceType("start", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The starting position in the $source"), new FunctionParameterSequenceType("length", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The length of the subsequence") }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the subsequence"))}; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstring.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstring.java index 5c87b3792be..fe48962bf60 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstring.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstring.java @@ -58,8 +58,8 @@ public class FunSubstring extends Function { "is zero or negative, only those characters in positions greater than zero are returned." + "If the value of $source is the empty sequence, the zero-length string is returned.", new SequenceType[] { - new FunctionParameterSequenceType("source", Type.STRING, Cardinality.ZERO_OR_ONE, "The source string"), - new FunctionParameterSequenceType("starting-at", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The starting position") + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The source string"), + new FunctionParameterSequenceType("start", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The starting position") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the substring") ), @@ -70,8 +70,8 @@ public class FunSubstring extends Function { "beyond the end of $source. If $starting-at is zero or negative, only those characters in positions greater " + "than zero are returned. If the value of $source is the empty sequence, the zero-length string is returned.", new SequenceType[] { - new FunctionParameterSequenceType("source", Type.STRING, Cardinality.ZERO_OR_ONE, "The source string"), - new FunctionParameterSequenceType("starting-at", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The starting position"), + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The source string"), + new FunctionParameterSequenceType("start", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The starting position"), new FunctionParameterSequenceType("length", Type.DOUBLE, Cardinality.EXACTLY_ONE, "The number of characters in the substring") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the substring") diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstringAfter.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstringAfter.java index 84faa6d2a10..12ad0f104fd 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstringAfter.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstringAfter.java @@ -47,9 +47,9 @@ */ public class FunSubstringAfter extends CollatingFunction { - protected static final FunctionParameterSequenceType COLLATION_ARG = new FunctionParameterSequenceType("collation-uri", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI"); - protected static final FunctionParameterSequenceType SEARCH_ARG = new FunctionParameterSequenceType("search", Type.STRING, Cardinality.ZERO_OR_ONE, "The search string"); - protected static final FunctionParameterSequenceType SOURCE_ARG = new FunctionParameterSequenceType("source", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"); + protected static final FunctionParameterSequenceType COLLATION_ARG = new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI"); + protected static final FunctionParameterSequenceType SEARCH_ARG = new FunctionParameterSequenceType("substring", Type.STRING, Cardinality.ZERO_OR_ONE, "The search string"); + protected static final FunctionParameterSequenceType SOURCE_ARG = new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"); public final static FunctionSignature[] signatures = { new FunctionSignature( new QName("substring-after", Function.BUILTIN_FUNCTION_NS), diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstringBefore.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstringBefore.java index 914a1c6953a..78310be30a2 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstringBefore.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSubstringBefore.java @@ -47,9 +47,9 @@ */ public class FunSubstringBefore extends CollatingFunction { - protected static final FunctionParameterSequenceType COLLATOR_ARG = new FunctionParameterSequenceType("collation-uri", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI"); - protected static final FunctionParameterSequenceType SEARCH_ARG = new FunctionParameterSequenceType("search", Type.STRING, Cardinality.ZERO_OR_ONE, "The search string"); - protected static final FunctionParameterSequenceType SOURCE_ARG = new FunctionParameterSequenceType("source", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"); + protected static final FunctionParameterSequenceType COLLATOR_ARG = new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.EXACTLY_ONE, "The collation URI"); + protected static final FunctionParameterSequenceType SEARCH_ARG = new FunctionParameterSequenceType("substring", Type.STRING, Cardinality.ZERO_OR_ONE, "The search string"); + protected static final FunctionParameterSequenceType SOURCE_ARG = new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"); public final static FunctionSignature[] signatures = { new FunctionSignature( new QName("substring-before", Function.BUILTIN_FUNCTION_NS), diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSum.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSum.java index d333af4718a..281695f33fb 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSum.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSum.java @@ -55,7 +55,7 @@ public class FunSum extends Function { "Returns a value obtained by adding together the values in $arg. " + "If $arg is the the empty sequence the xs:double value 0.0e0 is returned.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The sequence of numbers to be summed up")}, + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The sequence of numbers to be summed up")}, new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "the sum of all numbers in $arg") ), new FunctionSignature( @@ -63,8 +63,8 @@ public class FunSum extends Function { "Returns a value obtained by adding together the values in $arg. " + "If $arg is the the empty sequence then $default is returned.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The sequence of numbers to be summed up"), - new FunctionParameterSequenceType("default", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "The default value if $arg computes to the empty sequence") + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The sequence of numbers to be summed up"), + new FunctionParameterSequenceType("zero", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "The default value if $arg computes to the empty sequence") }, new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_ONE, "the sum of all numbers in $arg") ) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java index f31b8b645f0..c2d4a17e30f 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java @@ -44,8 +44,9 @@ public class FunTokenize extends BasicFunction { private static final QName FS_TOKENIZE_NAME = new QName("tokenize", Function.BUILTIN_FUNCTION_NS); - private final static FunctionParameterSequenceType FS_TOKENIZE_PARAM_INPUT = optParam("input", Type.STRING, "The input string"); - private final static FunctionParameterSequenceType FS_TOKENIZE_PARAM_PATTERN = param("pattern", Type.STRING, "The tokenization pattern"); + private final static FunctionParameterSequenceType FS_TOKENIZE_PARAM_INPUT = optParam("value", Type.STRING, "The input string"); + private final static FunctionParameterSequenceType FS_TOKENIZE_PARAM_PATTERN = + new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.ZERO_OR_ONE, "The tokenization pattern"); public final static FunctionSignature[] FS_TOKENIZE = functionSignatures( FS_TOKENIZE_NAME, @@ -62,7 +63,7 @@ public class FunTokenize extends BasicFunction { arity( FS_TOKENIZE_PARAM_INPUT, FS_TOKENIZE_PARAM_PATTERN, - param("flags", Type.STRING,"The flags") + new FunctionParameterSequenceType("flags", Type.STRING, Cardinality.ZERO_OR_ONE, "The flags") ) ) ); @@ -82,47 +83,120 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro if (string.isEmpty()) { result = Sequence.EMPTY_SEQUENCE; } else { - final int flags; - if (args.length == 3) { - flags = parseFlags(this, args[2].itemAt(0).getStringValue()); - } else { - flags = 0; - } + // XQ4: pattern can be empty sequence — treat as 1-arg whitespace form + final boolean useWhitespaceTokenization = args.length == 1 + || (args.length >= 2 && args[1].isEmpty()); - final String pattern; - if (args.length == 1) { - pattern = " "; + if (useWhitespaceTokenization) { string = FunNormalizeSpace.normalize(string); + if (string.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final String[] tokens = string.split(" "); + result = new ValueSequence(); + for (final String token : tokens) { + result.add(new StringValue(this, token)); + } } else { - if(hasLiteral(flags)) { - // no need to change anything - pattern = args[1].itemAt(0).getStringValue(); + // XQ4: flags can be empty sequence + String flagsStr = ""; + if (args.length == 3 && !args[2].isEmpty()) { + flagsStr = args[2].itemAt(0).getStringValue(); + } + + // XQ4: '!' flag — XPath mode (allows empty matches, etc.) + final boolean hasXPathFlag = flagsStr.contains("!"); + if (hasXPathFlag) { + flagsStr = flagsStr.replace("!", ""); + } + + // XQ4: 'c' flag — strip regex comments + final boolean hasCommentFlag = flagsStr.indexOf('c') >= 0 && flagsStr.indexOf('q') < 0; + if (flagsStr.indexOf('c') >= 0) { + flagsStr = flagsStr.replace("c", ""); + } + final int flags = parseFlags(this, flagsStr); + + String rawPattern = args[1].itemAt(0).getStringValue(); + if (hasCommentFlag) { + rawPattern = FunReplace.stripRegexComments(rawPattern); + } + final String pattern; + if (hasLiteral(flags)) { + pattern = rawPattern; } else { final boolean ignoreWhitespace = hasIgnoreWhitespace(flags); final boolean caseBlind = hasCaseInsensitive(flags); - pattern = translateRegexp(this, args[1].itemAt(0).getStringValue(), ignoreWhitespace, caseBlind); + pattern = translateRegexp(this, rawPattern, ignoreWhitespace, caseBlind); } - } - try { - final Pattern pat = PatternFactory.getInstance().getPattern(pattern, flags); - if (pat.matcher("").matches()) { - throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string"); + try { + final Pattern pat = PatternFactory.getInstance().getPattern(pattern, flags); + final boolean canMatchEmpty = pat.matcher("").matches(); + final boolean allowEmptyMatch = hasXPathFlag || context.getXQueryVersion() >= 40; + + if (canMatchEmpty && !allowEmptyMatch) { + throw new XPathException(this, ErrorCodes.FORX0003, + "Regular expression matches zero-length string"); + } + + if (canMatchEmpty) { + // XQ4: empty-matching regex allowed — tokenize between each character + result = tokenizeEmptyMatch(string, pat); + } else { + final String[] tokens = pat.split(string, -1); + result = new ValueSequence(); + for (final String token : tokens) { + result.add(new StringValue(this, token)); + } + } + + } catch (final PatternSyntaxException e) { + throw new XPathException(this, ErrorCodes.FORX0001, "Invalid regular expression: " + e.getMessage(), new StringValue(this, pattern), e); } + } + } + } - final String[] tokens = pat.split(string, -1); - result = new ValueSequence(); + return result; + } - for (final String token : tokens) { - result.add(new StringValue(this, token)); - } + /** + * XQ4: Handle tokenization when the regex matches the empty string. + * Per spec: zero-length matches at start/end of string do not produce + * leading/trailing empty tokens. Empty matches advance past one character. + */ + private Sequence tokenizeEmptyMatch(final String input, final Pattern pat) throws XPathException { + final ValueSequence result = new ValueSequence(); + final java.util.regex.Matcher matcher = pat.matcher(input); + int lastEnd = 0; + while (matcher.find()) { + final boolean isEmpty = matcher.start() == matcher.end(); + + // Skip zero-length match at end of string + if (isEmpty && matcher.start() >= input.length()) { + break; + } + + // Add token: text from end of last match to start of this match + result.add(new StringValue(this, input.substring(lastEnd, matcher.start()))); + lastEnd = matcher.end(); - } catch (final PatternSyntaxException e) { - throw new XPathException(this, ErrorCodes.FORX0001, "Invalid regular expression: " + e.getMessage(), new StringValue(this, pattern), e); + // For empty match, advance matcher past one character to prevent infinite loop. + // The skipped character becomes part of the next token (not consumed). + if (isEmpty) { + final int nextPos = lastEnd + 1; + if (nextPos <= input.length()) { + matcher.region(nextPos, input.length()); + } else { + break; } } } - + // Add trailing token + if (lastEnd <= input.length()) { + result.add(new StringValue(this, input.substring(lastEnd))); + } return result; } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTrace.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTrace.java index e6c78e43828..c7f8c6f3190 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTrace.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTrace.java @@ -42,7 +42,7 @@ public class FunTrace extends BasicFunction { - private static final FunctionParameterSequenceType FS_PARAM_VALUE = optManyParam("value", Type.ITEM, "The values"); + private static final FunctionParameterSequenceType FS_PARAM_VALUE = optManyParam("input", Type.ITEM, "The values"); private static final FunctionParameterSequenceType FS_PARAM_LABEL = param("label", Type.STRING, "The label in the log file"); private static final String FS_TRACE_NAME = "trace"; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTranslate.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTranslate.java index c13a449b0e5..1a5a37302b0 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTranslate.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTranslate.java @@ -57,10 +57,10 @@ public class FunTranslate extends Function { "string $arg is returned.\n\nIf a character occurs more than once in $map, then the first occurrence determines " + "the replacement character. If $trans is longer than $map, the excess characters are ignored.\n\n" + "i.e. fn:translate(\"bar\",\"abc\",\"ABC\") returns \"BAr\"", - new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to be translated"), - new FunctionParameterSequenceType("map", Type.STRING, Cardinality.EXACTLY_ONE, "The map string"), - new FunctionParameterSequenceType("trans", Type.STRING, Cardinality.EXACTLY_ONE, "The translation string") + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to be translated"), + new FunctionParameterSequenceType("replace", Type.STRING, Cardinality.EXACTLY_ONE, "The map string"), + new FunctionParameterSequenceType("with", Type.STRING, Cardinality.EXACTLY_ONE, "The translation string") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the translated string")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUnordered.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUnordered.java index 4d4c448fdcf..790eedeb97b 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUnordered.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUnordered.java @@ -52,7 +52,7 @@ public class FunUnordered extends Function { "Takes a sequence $arg as input and returns an arbitrary implementation dependent permutation " + "of it. Currently, this has no effect in eXist, but it might be used for future optimizations.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the input sequence in an arbitrary implementation dependent permutation")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUnparsedText.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUnparsedText.java index e4e134e9919..21b2b837373 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUnparsedText.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUnparsedText.java @@ -37,14 +37,15 @@ import java.net.URISyntaxException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.nio.charset.UnsupportedCharsetException; import static java.nio.charset.StandardCharsets.UTF_8; import static org.exist.xquery.FunctionDSL.*; public class FunUnparsedText extends BasicFunction { - private final static FunctionParameterSequenceType PARAM_HREF = optParam("href", Type.STRING, "the URI to load text from"); - private final static FunctionParameterSequenceType PARAM_ENCODING = param("encoding", Type.STRING, "character encoding of the resource"); + private final static FunctionParameterSequenceType PARAM_HREF = optParam("source", Type.STRING, "the URI to load text from"); + private final static FunctionParameterSequenceType PARAM_ENCODING = param("options", Type.STRING, "character encoding of the resource"); static final FunctionSignature [] FS_UNPARSED_TEXT = functionSignatures( new QName("unparsed-text", Function.BUILTIN_FUNCTION_NS), @@ -96,7 +97,7 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce private boolean contentAvailable(final String uri, final String encoding) { final Charset charset; try { - charset = encoding != null ? Charset.forName(encoding) : UTF_8; + charset = encoding != null ? resolveCharset(encoding) : UTF_8; } catch (final IllegalArgumentException e) { return false; } @@ -120,7 +121,7 @@ private boolean contentAvailable(final String uri, final String encoding) { private String readContent(final String uri, final String encoding) throws XPathException { final Charset charset; try { - charset = encoding != null ? Charset.forName(encoding) : UTF_8; + charset = encoding != null ? resolveCharset(encoding) : UTF_8; } catch (final IllegalArgumentException e) { throw new XPathException(this, ErrorCodes.FOUT1190, e.getMessage()); } @@ -199,7 +200,7 @@ private Charset getCharset(final String encoding, final Source source) throws XP } } else { try { - charset = Charset.forName(encoding); + charset = resolveCharset(encoding); } catch (final IllegalArgumentException e) { throw new XPathException(this, ErrorCodes.FOUT1190, e.getMessage()); } @@ -207,14 +208,59 @@ private Charset getCharset(final String encoding, final Source source) throws XP return charset; } + /** + * Resolve a charset name, mapping common aliases that Java doesn't recognize. + */ + private static Charset resolveCharset(final String encoding) { + try { + return Charset.forName(encoding); + } catch (final UnsupportedCharsetException e) { + if ("iso-8859".equalsIgnoreCase(encoding)) { + return Charset.forName("iso-8859-1"); + } + throw e; + } + } + private Source getSource(final String uriParam) throws XPathException { try { - final URI uri = new URI(uriParam); + URI uri = new URI(uriParam); if (uri.getFragment() != null) { throw new XPathException(this, ErrorCodes.FOUT1170, "href argument may not contain fragment identifier"); } - final Source source = SourceFactory.getSource(context.getBroker(), "", uri.toASCIIString(), false); + // Resolve relative URIs against file: base URI directory + boolean resolvedFromBaseUri = false; + if (!uri.isAbsolute()) { + final AnyURIValue baseXdmUri = context.getBaseURI(); + if (baseXdmUri != null && !baseXdmUri.equals(AnyURIValue.EMPTY_URI)) { + String baseStr = baseXdmUri.toURI().toString(); + if (baseStr.startsWith("file:")) { + final int lastSlash = baseStr.lastIndexOf('/'); + if (lastSlash >= 0) { + baseStr = baseStr.substring(0, lastSlash + 1); + } + uri = new URI(baseStr).resolve(uri); + resolvedFromBaseUri = true; + } + } + } + + final String resolvedUri = uri.toASCIIString(); + + // Only use direct file: access for URIs resolved from a relative path + // against a file: base URI. Absolute file: URIs (e.g., file:///etc/passwd) + // must go through SourceFactory which enforces security checks. + if (resolvedFromBaseUri && resolvedUri.startsWith("file:")) { + final String filePath = resolvedUri.replaceFirst("^file:(?://[^/]*)?", ""); + final java.nio.file.Path path = java.nio.file.Paths.get(filePath); + if (java.nio.file.Files.isReadable(path)) { + return new FileSource(path, false); + } + throw new XPathException(this, ErrorCodes.FOUT1170, "Could not find source for: " + uriParam); + } + + final Source source = SourceFactory.getSource(context.getBroker(), "", resolvedUri, false); if (source == null) { throw new XPathException(this, ErrorCodes.FOUT1170, "Could not find source for: " + uriParam); } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUpperOrLowerCase.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUpperOrLowerCase.java index 6a5f576a98a..3816c2b68b7 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUpperOrLowerCase.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUpperOrLowerCase.java @@ -46,14 +46,14 @@ public class FunUpperOrLowerCase extends Function { new FunctionSignature( new QName("upper-case", Function.BUILTIN_FUNCTION_NS), "Returns the value of $arg after translating every character to its upper-case correspondent as defined in the appropriate case mappings section in the Unicode standard. For versions of Unicode beginning with the 2.1.8 update, only locale-insensitive case mappings should be applied. Beginning with version 3.2.0 (and likely future versions) of Unicode, precise mappings are described in default case operations, which are full case mappings in the absence of tailoring for particular languages and environments. Every lower-case character that does not have an upper-case correspondent, as well as every upper-case character, is included in the returned value in its original form.", - new SequenceType[] { new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The text to be converted to all upper-case characters") }, + new SequenceType[] { new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The text to be converted to all upper-case characters") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the resulting upper-case text")); public final static FunctionSignature fnLowerCase = new FunctionSignature( new QName("lower-case", Function.BUILTIN_FUNCTION_NS), "Returns the value of $arg after translating every character to its lower-case correspondent as defined in the appropriate case mappings section in the Unicode standard. For versions of Unicode beginning with the 2.1.8 update, only locale-insensitive case mappings should be applied. Beginning with version 3.2.0 (and likely future versions) of Unicode, precise mappings are described in default case operations, which are full case mappings in the absence of tailoring for particular languages and environments. Every upper-case character that does not have a lower-case correspondent, as well as every lower-case character, is included in the returned value in its original form.", - new SequenceType[] { new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The text to be converted to all lower-case characters") }, + new SequenceType[] { new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The text to be converted to all lower-case characters") }, new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the resulting lower-case text")); public FunUpperOrLowerCase(XQueryContext context, FunctionSignature signature) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUriCollection.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUriCollection.java index 3b9426af31e..3a296a88c77 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUriCollection.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUriCollection.java @@ -48,7 +48,7 @@ public class FunUriCollection extends BasicFunction { private static final FunctionReturnSequenceType FN_RETURN = returnsOptMany(Type.ANY_URI, "the default URI collection, if $arg is not specified or is an empty sequence, " + "or the sequence of URIs that correspond to the supplied URI"); - private static final FunctionParameterSequenceType ARG = optParam("arg", Type.STRING, + private static final FunctionParameterSequenceType ARG = optParam("source", Type.STRING, "An xs:string identifying a URI Collection. " + "The argument is interpreted as either an absolute xs:anyURI, or a relative xs:anyURI resolved " + "against the base-URI property from the static context. In eXist-db this function consults the " + diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java index b1e3c09890f..2dc4ab4b503 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java @@ -36,6 +36,7 @@ import java.io.Writer; import java.math.BigDecimal; import java.util.ArrayList; +import java.util.Set; import static org.exist.xquery.FunctionDSL.*; @@ -99,7 +100,160 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro * @param writer the Writer to be used * @throws XPathException on error in XML JSON input according to specification */ + private static final Set JSON_ELEMENT_NAMES = Set.of("map", "array", "null", "boolean", "number", "string"); + private void nodeValueToJson(final NodeValue nodeValue, final Writer writer) throws XPathException { + // If the input is an element node (not a document), use DOM-based conversion + // to avoid XMLStreamReader traversing the entire owner document + if (nodeValue.getType() == Type.ELEMENT) { + elementToJson(nodeValue, writer); + return; + } + + documentToJson(nodeValue, writer); + } + + private void documentToJson(final NodeValue nodeValue, final Writer writer) throws XPathException { + // For document nodes, find the first child element and convert it + final org.w3c.dom.Node docNode = nodeValue.getNode(); + org.w3c.dom.Node child = docNode.getFirstChild(); + while (child != null && child.getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) { + child = child.getNextSibling(); + } + if (child == null) { + throw new XPathException(this, ErrorCodes.FOJS0006, "Invalid XML representation of JSON. Document has no element child."); + } + elementToJson((NodeValue) child, writer); + } + + private void elementToJson(final NodeValue nodeValue, final Writer writer) throws XPathException { + final org.w3c.dom.Element element = (org.w3c.dom.Element) nodeValue.getNode(); + final JsonFactory jsonFactory = new JsonFactory(); + try (final JsonGenerator jsonGenerator = jsonFactory.createGenerator(writer)) { + writeJsonElement(element, jsonGenerator); + } catch (final IOException e) { + throw new XPathException(this, ErrorCodes.FOER0000, e.getMessage(), e); + } + } + + private void writeJsonElement(final org.w3c.dom.Element element, final JsonGenerator gen) throws XPathException, IOException { + final String localName = element.getLocalName() != null ? element.getLocalName() : element.getTagName(); + + if (!JSON_ELEMENT_NAMES.contains(localName)) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Found XML element which is not one of [map, array, null, boolean, number, string]."); + } + + switch (localName) { + case "map": + gen.writeStartObject(); + final org.w3c.dom.NodeList mapChildren = element.getChildNodes(); + final java.util.Set seenKeys = new java.util.HashSet<>(); + for (int i = 0; i < mapChildren.getLength(); i++) { + final org.w3c.dom.Node child = mapChildren.item(i); + if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + final org.w3c.dom.Element childElem = (org.w3c.dom.Element) child; + final String keyValue = getKeyAttribute(childElem); + if (keyValue == null) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Map entry missing 'key' attribute."); + } + if (!seenKeys.add(keyValue)) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Duplicate key '" + keyValue + "' in map."); + } + gen.writeFieldName(keyValue); + writeJsonElement(childElem, gen); + } + } + gen.writeEndObject(); + break; + + case "array": + gen.writeStartArray(); + final org.w3c.dom.NodeList arrayChildren = element.getChildNodes(); + for (int i = 0; i < arrayChildren.getLength(); i++) { + final org.w3c.dom.Node child = arrayChildren.item(i); + if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + writeJsonElement((org.w3c.dom.Element) child, gen); + } + } + gen.writeEndArray(); + break; + + case "string": + final String strContent = getTextContent(element); + final boolean escaped = "true".equals(element.getAttribute("escaped")); + if (escaped) { + try { + gen.writeString(unescapeEscapedJsonString(strContent)); + } catch (final IOException e) { + throw new XPathException(this, ErrorCodes.FOJS0007, "Bad JSON escape sequence."); + } + } else { + gen.writeString(strContent); + } + break; + + case "number": + final String numStr = getTextContent(element); + try { + gen.writeNumber(new java.math.BigDecimal(numStr)); + } catch (final NumberFormatException e) { + throw new XPathException(this, ErrorCodes.FOJS0006, "Cannot convert '" + numStr + "' to a number."); + } + break; + + case "boolean": + final String boolStr = getTextContent(element); + final boolean boolVal = !("0".equals(boolStr) || "false".equals(boolStr) || boolStr.isEmpty()); + gen.writeBoolean(boolVal); + break; + + case "null": + final String nullContent = getTextContent(element); + if (!nullContent.isEmpty()) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Found non-empty XML null element."); + } + gen.writeNull(); + break; + } + } + + private String getKeyAttribute(final org.w3c.dom.Element element) throws XPathException { + final String escapedKey = element.getAttribute("escaped-key"); + // getAttribute returns "" for missing attributes, so check hasAttribute + if (!element.hasAttribute("key")) { + return null; + } + final String key = element.getAttribute("key"); + if ("true".equals(escapedKey)) { + try { + return unescapeEscapedJsonString(key); + } catch (final IOException e) { + throw new XPathException(this, ErrorCodes.FOJS0007, "Bad JSON escape sequence in key."); + } + } + return key; + } + + private String getTextContent(final org.w3c.dom.Element element) { + final StringBuilder sb = new StringBuilder(); + final org.w3c.dom.NodeList children = element.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + final org.w3c.dom.Node child = children.item(i); + if (child.getNodeType() == org.w3c.dom.Node.TEXT_NODE + || child.getNodeType() == org.w3c.dom.Node.CDATA_SECTION_NODE) { + sb.append(child.getTextContent()); + } + } + return sb.toString(); + } + + // Keep the old XMLStreamReader-based method for reference but it's no longer called + @SuppressWarnings("unused") + private void nodeValueToJsonViaStream(final NodeValue nodeValue, final Writer writer) throws XPathException { final StringBuilder tempStringBuilder = new StringBuilder(); final JsonFactory jsonFactory = new JsonFactory(); final Integer stackSeparator = 0; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunZeroOrOne.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunZeroOrOne.java index ceeb5697425..4d2b8a442ab 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunZeroOrOne.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunZeroOrOne.java @@ -50,7 +50,7 @@ public class FunZeroOrOne extends Function { "Returns the argument sequence $arg if it contains zero or one items. Otherwise, " + "raises an error.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence to be tested for cardinality") + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence to be tested for cardinality") }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_ONE, "the input sequence if it contains zero or one items.")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/JSON.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/JSON.java index 42912e0569d..541728f3808 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/JSON.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/JSON.java @@ -27,6 +27,7 @@ import org.exist.Namespaces; import org.exist.dom.QName; import org.exist.dom.memtree.MemTreeBuilder; +import org.exist.xquery.value.BooleanValue; import org.exist.security.PermissionDeniedException; import org.exist.source.Source; import org.exist.source.SourceFactory; @@ -49,8 +50,8 @@ */ public class JSON extends BasicFunction { - private static final FunctionParameterSequenceType FS_PARAM_JSON_TEXT = optParam("json-text", Type.STRING, "JSON text as defined in [RFC 7159]. The function parses this string to return an XDM value"); - private static final FunctionParameterSequenceType FS_PARAM_HREF = optParam("href", Type.STRING,"URL pointing to a JSON resource"); + private static final FunctionParameterSequenceType FS_PARAM_JSON_TEXT = optParam("value", Type.STRING, "JSON text as defined in [RFC 7159]. The function parses this string to return an XDM value"); + private static final FunctionParameterSequenceType FS_PARAM_HREF = optParam("source", Type.STRING,"URL pointing to a JSON resource"); private static final FunctionParameterSequenceType FS_PARAM_OPTIONS = param("options", Type.MAP_ITEM, "Parsing options"); private static final String FS_PARSE_JSON_NAME = "parse-json"; @@ -124,15 +125,30 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce // TODO: jackson does not allow access to raw string, so option "unescape" is not supported boolean liberal = false; String handleDuplicates = OPTION_DUPLICATES_USE_LAST; - if (getArgumentCount() == 2) { - final MapType options = (MapType)args[1].itemAt(0); + if (getArgumentCount() == 2 && !args[1].isEmpty()) { + final Item optItem = args[1].itemAt(0); + if (optItem.getType() != Type.MAP_ITEM) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Expected map for options parameter, got " + Type.getTypeName(optItem.getType())); + } + final MapType options = (MapType) optItem; final Sequence liberalOpt = options.get(new StringValue(OPTION_LIBERAL)); if (liberalOpt.hasOne()) { - liberal = liberalOpt.itemAt(0).convertTo(Type.BOOLEAN).effectiveBooleanValue(); + final Item liberalItem = liberalOpt.itemAt(0); + if (liberalItem.getType() != Type.BOOLEAN) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Option 'liberal' must be a boolean, got " + Type.getTypeName(liberalItem.getType())); + } + liberal = ((BooleanValue) liberalItem).effectiveBooleanValue(); } final Sequence duplicateOpt = options.get(new StringValue(OPTION_DUPLICATES)); if (duplicateOpt.hasOne()) { - handleDuplicates = duplicateOpt.itemAt(0).getStringValue(); + final Item dupItem = duplicateOpt.itemAt(0); + if (!Type.subTypeOf(dupItem.getType(), Type.STRING)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Option 'duplicates' must be a string, got " + Type.getTypeName(dupItem.getType())); + } + handleDuplicates = dupItem.getStringValue(); } } @@ -212,24 +228,73 @@ private Sequence parseResource(Sequence href, String handleDuplicates, JsonFacto } try { String url = href.getStringValue(); + boolean resolvedFromBaseUri = false; if (url.indexOf(':') == Constants.STRING_NOT_FOUND) { - url = XmldbURI.EMBEDDED_SERVER_URI_PREFIX + url; + // Relative URI: resolve against static base URI + final String resolved = resolveAgainstBaseUri(url); + if (resolved != null && resolved.startsWith("file:")) { + url = resolved; + resolvedFromBaseUri = true; + } else { + url = XmldbURI.EMBEDDED_SERVER_URI_PREFIX + url; + } } + // Only use direct file: access for URIs resolved from a relative path. + // Absolute file: URIs go through SourceFactory for security. + if (resolvedFromBaseUri && url.startsWith("file:")) { + // Extract path from file: URI: file:/path, file://host/path, file:///path + final String filePath = url.replaceFirst("^file:(?://[^/]*)?", ""); + final java.nio.file.Path path = java.nio.file.Paths.get(filePath); + if (java.nio.file.Files.isReadable(path)) { + try (final InputStream is = java.nio.file.Files.newInputStream(path)) { + try (final JsonParser parser = factory.createParser(is)) { + final Item result = readValue(context, parser, handleDuplicates); + return result == null ? Sequence.EMPTY_SEQUENCE : result.toSequence(); + } catch (final IOException jsonErr) { + // JSON parsing error, not file I/O + throw new XPathException(this, ErrorCodes.FOJS0001, jsonErr.getMessage()); + } + } + } + throw new XPathException(this, ErrorCodes.FOUT1170, "failed to load json doc from file: " + filePath); + } + final Source source = SourceFactory.getSource(context.getBroker(), "", url, false); if (source == null) { throw new XPathException(this, ErrorCodes.FOUT1170, "failed to load json doc from URI " + url); } - try (final InputStream is = source.getInputStream(); - final JsonParser parser = factory.createParser(is)) { - - final Item result = readValue(context, parser, handleDuplicates); - return result == null ? Sequence.EMPTY_SEQUENCE : result.toSequence(); + try (final InputStream is = source.getInputStream()) { + try (final JsonParser parser = factory.createParser(is)) { + final Item result = readValue(context, parser, handleDuplicates); + return result == null ? Sequence.EMPTY_SEQUENCE : result.toSequence(); + } catch (final IOException jsonErr) { + throw new XPathException(this, ErrorCodes.FOJS0001, jsonErr.getMessage()); + } } } catch (IOException | PermissionDeniedException e) { throw new XPathException(this, ErrorCodes.FOUT1170, e.getMessage()); } } + private String resolveAgainstBaseUri(final String relativePath) { + try { + final AnyURIValue baseXdmUri = context.getBaseURI(); + if (baseXdmUri != null && !baseXdmUri.equals(AnyURIValue.EMPTY_URI)) { + String baseStr = baseXdmUri.toURI().toString(); + // Strip filename to get directory URI + final int lastSlash = baseStr.lastIndexOf('/'); + if (lastSlash >= 0) { + baseStr = baseStr.substring(0, lastSlash + 1); + } + final java.net.URI baseUri = new java.net.URI(baseStr); + return baseUri.resolve(relativePath).toString(); + } + } catch (final java.net.URISyntaxException | XPathException e) { + // fall through + } + return null; + } + /** * Generate an XDM from the tokens delivered by the JSON parser. * diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/LoadXQueryModule.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/LoadXQueryModule.java index f2d409ebeb9..138e962542d 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/LoadXQueryModule.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/LoadXQueryModule.java @@ -27,13 +27,18 @@ import io.lacuna.bifurcan.Map; import io.lacuna.bifurcan.Maps; import org.exist.dom.QName; +import org.exist.source.StringSource; import org.exist.xquery.*; import org.exist.xquery.Module; import org.exist.xquery.functions.map.AbstractMapType; import org.exist.xquery.functions.map.MapType; import org.exist.xquery.parser.XQueryAST; +import org.exist.xquery.parser.XQueryLexer; +import org.exist.xquery.parser.XQueryParser; +import org.exist.xquery.parser.XQueryTreeParser; import org.exist.xquery.value.*; +import java.io.Reader; import java.util.*; import static org.exist.xquery.functions.map.MapType.newLinearMap; @@ -98,6 +103,7 @@ public class LoadXQueryModule extends BasicFunction { public final static StringValue OPTIONS_VARIABLES = new StringValue("variables"); public final static StringValue OPTIONS_CONTEXT_ITEM = new StringValue("context-item"); public final static StringValue OPTIONS_VENDOR = new StringValue("vendor-options"); + public final static StringValue OPTIONS_CONTENT = new StringValue("content"); public final static StringValue RESULT_FUNCTIONS = new StringValue("functions"); public final static StringValue RESULT_VARIABLES = new StringValue("variables"); @@ -116,6 +122,7 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce String xqVersion = getXQueryVersion(context.getXQueryVersion()); AbstractMapType externalVars = new MapType(this, context); Sequence contextItem = Sequence.EMPTY_SEQUENCE; + String contentSource = null; // evaluate options if (getArgumentCount() == 2) { @@ -144,6 +151,12 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce throw new XPathException(this, ErrorCodes.XPTY0004, "Option 'context-item' must contain zero or one " + "items"); } + + // XQ4: content option — compile module from provided source string + final Sequence contentOption = map.get(OPTIONS_CONTENT); + if (!contentOption.isEmpty()) { + contentSource = contentOption.getStringValue(); + } } // create temporary context so main context is not polluted @@ -154,15 +167,21 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce tempContext.prepareForExecution(); Module[] loadedModules = null; - try { - loadedModules = tempContext.importModule(targetNamespace, null, locationHints); - } catch (final XPathException e) { - if (e.getErrorCode() == ErrorCodes.XQST0059) { - // importModule may throw exception if no location is given and module cannot be resolved - throw new XPathException(this, ErrorCodes.FOQM0002, "Module with URI " + targetNamespace + " not found"); + if (contentSource != null) { + // XQ4: compile module from content string + final ExternalModule contentModule = compileModuleFromContent( + targetNamespace, contentSource, tempContext); + loadedModules = new Module[] { contentModule }; + } else { + try { + loadedModules = tempContext.importModule(targetNamespace, null, locationHints); + } catch (final XPathException e) { + if (e.getErrorCode() == ErrorCodes.XQST0059) { + throw new XPathException(this, ErrorCodes.FOQM0002, "Module with URI " + targetNamespace + " not found"); + } + throw new XPathException(this, ErrorCodes.FOQM0003, "Error found when importing module: " + e.getMessage()); } - throw new XPathException(this, ErrorCodes.FOQM0003, "Error found when importing module: " + e.getMessage()); } // not found, raise error @@ -170,7 +189,8 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce throw new XPathException(this, ErrorCodes.FOQM0002, "Module with URI " + targetNamespace + " not found"); } - if (!xqVersion.equals(getXQueryVersion(tempContext.getXQueryVersion()))) { + // Only enforce version check for URI-loaded modules, not content-loaded + if (contentSource == null && !xqVersion.equals(getXQueryVersion(tempContext.getXQueryVersion()))) { throw new XPathException(this, ErrorCodes.FOQM0003, "Imported module has wrong XQuery version: " + getXQueryVersion(tempContext.getXQueryVersion())); } @@ -284,6 +304,36 @@ public static void addFunctionRefsFromModule(final Expression parent, final XQue } } + /** + * XQ4: Compile a library module from a content string. + * Uses XQueryContext.compileModuleFromSource() which handles all the + * parsing, AST walking, and module registration. + */ + private ExternalModule compileModuleFromContent(final String targetNamespace, + final String content, final XQueryContext tempContext) throws XPathException { + final StringSource source = new StringSource(content); + try { + final ExternalModule module = tempContext.compileModuleFromSource(targetNamespace, source); + if (module == null) { + throw new XPathException(this, ErrorCodes.FOQM0005, + "Content string is not a library module"); + } + // Verify the module's namespace matches the target + if (!module.getNamespaceURI().equals(targetNamespace)) { + throw new XPathException(this, ErrorCodes.FOQM0001, + "Module namespace '" + module.getNamespaceURI() + + "' does not match target namespace '" + targetNamespace + "'"); + } + return module; + } catch (final XPathException e) { + if (e.getErrorCode() == ErrorCodes.FOQM0001 || e.getErrorCode() == ErrorCodes.FOQM0005) { + throw e; + } + throw new XPathException(this, ErrorCodes.FOQM0003, + "Error compiling module content: " + e.getMessage(), e); + } + } + private static String getXQueryVersion(final int version) { return String.valueOf(version / 10) + '.' + version % 10; } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/ParsingFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/ParsingFunctions.java index 48a8353d83c..c1f9408cd61 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/ParsingFunctions.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/ParsingFunctions.java @@ -45,7 +45,7 @@ public class ParsingFunctions extends BasicFunction { Cardinality.ZERO_OR_ONE, "the document node with the parsed result"); protected static final FunctionParameterSequenceType TO_BE_PARSED_PARAMETER = new FunctionParameterSequenceType( - "arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to be parsed"); + "value", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to be parsed"); protected static final Logger logger = LogManager.getLogger(ParsingFunctions.class); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/QNameFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/QNameFunctions.java index a9dd23ac682..5322ba14c6f 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/QNameFunctions.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/QNameFunctions.java @@ -51,7 +51,7 @@ public class QNameFunctions extends BasicFunction { "Returns an xs:NCName representing the prefix of $arg. If $arg is the empty " + "sequence, returns the empty sequence.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.QNAME, Cardinality.ZERO_OR_ONE, "The QName") + new FunctionParameterSequenceType("value", Type.QNAME, Cardinality.ZERO_OR_ONE, "The QName") }, new FunctionReturnSequenceType(Type.NCNAME, Cardinality.ZERO_OR_ONE, "the prefix")); @@ -61,7 +61,7 @@ public class QNameFunctions extends BasicFunction { "Returns an xs:NCName representing the local part of $arg. If $arg is the empty " + "sequence, returns the empty sequence.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.QNAME, Cardinality.ZERO_OR_ONE, "The QName") + new FunctionParameterSequenceType("value", Type.QNAME, Cardinality.ZERO_OR_ONE, "The QName") }, new FunctionReturnSequenceType(Type.NCNAME, Cardinality.ZERO_OR_ONE, "the local name")); @@ -71,7 +71,7 @@ public class QNameFunctions extends BasicFunction { "Returns the namespace URI for $arg. If $arg is the empty " + "sequence, returns the empty sequence.", new SequenceType[] { - new FunctionParameterSequenceType("arg", Type.QNAME, Cardinality.ZERO_OR_ONE, "The QName") + new FunctionParameterSequenceType("value", Type.QNAME, Cardinality.ZERO_OR_ONE, "The QName") }, new FunctionReturnSequenceType(Type.ANY_URI, Cardinality.ZERO_OR_ONE, "the namespace URI")); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/map/AbstractMapType.java b/exist-core/src/main/java/org/exist/xquery/functions/map/AbstractMapType.java index 0ea9d160dbf..c388d59b094 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/map/AbstractMapType.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/map/AbstractMapType.java @@ -186,7 +186,7 @@ protected static boolean sameKey(@Nullable final Collator collator, final Atomic // Both $k1 and $k2 are negative infinity // Note: xs:double('-INF') is the same key as xs:float('-INF') - if (((NumericValue) k1).isNegativeInfinity() && ((NumericValue) k2).isNegative()) { + if (((NumericValue) k1).isNegativeInfinity() && ((NumericValue) k2).isNegativeInfinity()) { return true; } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/map/MapExpr.java b/exist-core/src/main/java/org/exist/xquery/functions/map/MapExpr.java index def13868b4d..95f117c1e86 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/map/MapExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/map/MapExpr.java @@ -34,6 +34,7 @@ import static org.exist.xquery.functions.map.MapType.newLinearMap; + /** * Implements the literal syntax for creating maps. */ @@ -69,6 +70,7 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP contextSequence = contextItem.toSequence(); } final IMap map = newLinearMap(null); + final List keyOrder = new ArrayList<>(this.mappings.size()); boolean firstType = true; int prevType = AbstractMapType.UNKNOWN_KEY_TYPE; @@ -84,6 +86,7 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP throw new XPathException(this, ErrorCodes.XQDY0137, "Key \"" + atomic.getStringValue() + "\" already exists in map."); } map.put(atomic, value); + keyOrder.add(atomic); final int thisType = atomic.getType(); if (firstType) { @@ -96,7 +99,9 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP } } - return new MapType(this, context, map.forked(), prevType); + final MapType result = new MapType(this, context, map.forked(), prevType); + result.setInsertionOrder(keyOrder); + return result; } @Override diff --git a/exist-core/src/main/java/org/exist/xquery/functions/map/MapFunction.java b/exist-core/src/main/java/org/exist/xquery/functions/map/MapFunction.java index 029caa0cdee..feb6fdc44ea 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/map/MapFunction.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/map/MapFunction.java @@ -169,6 +169,108 @@ public class MapFunction extends BasicFunction { ) ); + // --- XQuery 4.0 map functions --- + public static final FunctionSignature FNS_EMPTY = functionSignature( + Fn.EMPTY.fname, + "Returns true if the supplied map contains no entries.", + returns(Type.BOOLEAN, Cardinality.EXACTLY_ONE), + PARAM_INPUT_MAP + ); + public static final FunctionSignature FNS_ITEMS = functionSignature( + Fn.ITEMS.fname, + "Returns a sequence containing all the values present in a map, in entry order.", + RETURN_OPT_MANY_ITEM, + PARAM_INPUT_MAP + ); + public static final FunctionSignature FNS_ENTRIES = functionSignature( + Fn.ENTRIES.fname, + "Returns the entries of a map as a sequence of singleton maps.", + returns(Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, "A sequence of singleton maps"), + PARAM_INPUT_MAP + ); + public static final FunctionSignature FNS_KEYS_WHERE = functionSignature( + Fn.KEYS_WHERE.fname, + "Returns the keys in a map for which the supplied predicate function returns true.", + returns(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE), + PARAM_INPUT_MAP, + funParam( + "predicate", + params( + param("key", Type.ANY_ATOMIC_TYPE, "the key"), + optManyParam("value", Type.ITEM, "the value") + ), + returns(Type.BOOLEAN), + "The predicate function" + ) + ); + public static final FunctionSignature FNS_FILTER = functionSignature( + Fn.FILTER.fname, + "Returns a map containing those entries from the input map for which the supplied predicate returns true.", + RETURN_MAP, + PARAM_INPUT_MAP, + funParam( + "predicate", + params( + param("key", Type.ANY_ATOMIC_TYPE, "the key"), + optManyParam("value", Type.ITEM, "the value") + ), + returns(Type.BOOLEAN), + "The predicate function" + ) + ); + public static final FunctionSignature[] FS_BUILD = { + functionSignature( + Fn.BUILD.fname, + "Constructs a map from a sequence by applying key and value functions.", + RETURN_MAP, + optManyParam("input", Type.ITEM, "The input sequence") + ), + functionSignature( + Fn.BUILD.fname, + "Constructs a map from a sequence by applying key and value functions.", + RETURN_MAP, + optManyParam("input", Type.ITEM, "The input sequence"), + funParam("key", + params(optManyParam("item", Type.ITEM, "the current item")), + returns(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE), + "The key function" + ) + ), + functionSignature( + Fn.BUILD.fname, + "Constructs a map from a sequence by applying key and value functions.", + RETURN_MAP, + optManyParam("input", Type.ITEM, "The input sequence"), + funParam("key", + params(optManyParam("item", Type.ITEM, "the current item")), + returns(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE), + "The key function" + ), + funParam("value", + params(optManyParam("item", Type.ITEM, "the current item")), + RETURN_OPT_MANY_ITEM, + "The value function" + ) + ), + functionSignature( + Fn.BUILD.fname, + "Constructs a map from a sequence by applying key and value functions.", + RETURN_MAP, + optManyParam("input", Type.ITEM, "The input sequence"), + funParam("key", + params(optManyParam("item", Type.ITEM, "the current item")), + returns(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE), + "The key function" + ), + funParam("value", + params(optManyParam("item", Type.ITEM, "the current item")), + RETURN_OPT_MANY_ITEM, + "The value function" + ), + param("options", Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "Options map with duplicates handling") + ) + }; + private AnalyzeContextInfo cachedContextInfo; public MapFunction(final XQueryContext context, final FunctionSignature signature) { @@ -239,6 +341,12 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro case ENTRY -> entry(args); case REMOVE -> remove(args); case FOR_EACH -> forEach(args); + case EMPTY -> empty(args); + case ITEMS -> items(args); + case ENTRIES -> entries(args); + case KEYS_WHERE -> keysWhere(args); + case FILTER -> filter(args); + case BUILD -> build(args); }; } @@ -347,6 +455,191 @@ private Sequence forEach(final Sequence[] args) throws XPathException { } } + // --- XQuery 4.0 map function implementations --- + + private Sequence empty(final Sequence[] args) { + final AbstractMapType map = (AbstractMapType) args[0].itemAt(0); + return BooleanValue.valueOf(map.size() == 0); + } + + private Sequence items(final Sequence[] args) throws XPathException { + final AbstractMapType map = (AbstractMapType) args[0].itemAt(0); + final ArrayListValueSequence result = new ArrayListValueSequence(map.size()); + for (final IEntry entry : map) { + result.addAll(entry.value()); + } + return result; + } + + private Sequence entries(final Sequence[] args) { + final AbstractMapType map = (AbstractMapType) args[0].itemAt(0); + final ArrayListValueSequence result = new ArrayListValueSequence(map.size()); + for (final IEntry entry : map) { + result.add(new SingleKeyMapType(this, this.context, null, entry.key(), entry.value())); + } + return result; + } + + private Sequence keysWhere(final Sequence[] args) throws XPathException { + final AbstractMapType map = (AbstractMapType) args[0].itemAt(0); + try (final FunctionReference ref = (FunctionReference) args[1].itemAt(0)) { + ref.analyze(cachedContextInfo); + final ArrayListValueSequence result = new ArrayListValueSequence(); + for (final IEntry entry : map) { + final Sequence predicateResult = ref.evalFunction(null, null, + new Sequence[]{ entry.key(), entry.value() }); + if (!predicateResult.isEmpty() && predicateResult.effectiveBooleanValue()) { + result.add(entry.key()); + } + } + return result; + } + } + + private Sequence filter(final Sequence[] args) throws XPathException { + final AbstractMapType map = (AbstractMapType) args[0].itemAt(0); + try (final FunctionReference ref = (FunctionReference) args[1].itemAt(0)) { + ref.analyze(cachedContextInfo); + final int arity = ref.getSignature().getArgumentCount(); + AbstractMapType result = new MapType(this, this.context); + int position = 1; + for (final IEntry entry : map) { + final Sequence predicateResult; + if (arity >= 3) { + predicateResult = ref.evalFunction(null, null, + new Sequence[]{ entry.key(), entry.value(), + new IntegerValue(this, position, Type.INTEGER) }); + } else { + predicateResult = ref.evalFunction(null, null, + new Sequence[]{ entry.key(), entry.value() }); + } + if (!predicateResult.isEmpty() && predicateResult.effectiveBooleanValue()) { + result = result.put(entry.key(), entry.value()); + } + position++; + } + return result; + } + } + + private Sequence build(final Sequence[] args) throws XPathException { + final Sequence input = args[0]; + + // $key function -- defaults to identity + final FunctionReference keyFn = (args.length >= 2 && !args[1].isEmpty()) + ? (FunctionReference) args[1].itemAt(0) : null; + // $value function -- defaults to identity + final FunctionReference valueFn = (args.length >= 3 && !args[2].isEmpty()) + ? (FunctionReference) args[2].itemAt(0) : null; + + // Parse options + DuplicateMergeStrategy mergeDuplicates = DuplicateMergeStrategy.COMBINE; + FunctionReference duplicatesFn = null; + if (args.length >= 4 && !args[3].isEmpty()) { + final AbstractMapType options = (AbstractMapType) args[3].itemAt(0); + final StringValue dupKey = new StringValue(this, "duplicates"); + if (options.contains(dupKey)) { + final Sequence dupValue = options.get(dupKey); + if (dupValue.getItemCount() == 1 && Type.subTypeOf(dupValue.itemAt(0).getType(), Type.FUNCTION)) { + duplicatesFn = (FunctionReference) dupValue.itemAt(0); + duplicatesFn.analyze(cachedContextInfo); + mergeDuplicates = null; // use custom function + } else { + final String dupStr = dupValue.getStringValue(); + mergeDuplicates = DuplicateMergeStrategy.get(dupStr); + if (mergeDuplicates == null) { + throw new XPathException(this, ErrorCodes.FOJS0005, + "value for duplicates key was not recognised: " + dupStr); + } + } + } + } + + if (keyFn != null) { + keyFn.analyze(cachedContextInfo); + } + if (valueFn != null) { + valueFn.analyze(cachedContextInfo); + } + + try { + AbstractMapType result = new MapType(this, this.context); + int position = 1; + for (int i = 0; i < input.getItemCount(); i++) { + final Item item = input.itemAt(i); + final Sequence itemSeq = item.toSequence(); + final IntegerValue posValue = new IntegerValue(this, position, Type.INTEGER); + + // Compute key(s) + final Sequence keyResult; + if (keyFn != null) { + final int keyArity = keyFn.getSignature().getArgumentCount(); + keyResult = keyArity >= 2 + ? keyFn.evalFunction(null, null, new Sequence[]{ itemSeq, posValue }) + : keyFn.evalFunction(null, null, new Sequence[]{ itemSeq }); + } else { + keyResult = itemSeq; + } + + // Compute value + final Sequence valueResult; + if (valueFn != null) { + final int valArity = valueFn.getSignature().getArgumentCount(); + valueResult = valArity >= 2 + ? valueFn.evalFunction(null, null, new Sequence[]{ itemSeq, posValue }) + : valueFn.evalFunction(null, null, new Sequence[]{ itemSeq }); + } else { + valueResult = itemSeq; + } + + // For each key, add/merge into the result map + for (int k = 0; k < keyResult.getItemCount(); k++) { + final AtomicValue key = keyResult.itemAt(k).atomize(); + if (result.contains(key)) { + // Handle duplicate + final Sequence existingValue = result.get(key); + final Sequence mergedValue; + if (duplicatesFn != null) { + mergedValue = duplicatesFn.evalFunction(null, null, + new Sequence[]{ existingValue, valueResult }); + } else if (mergeDuplicates == DuplicateMergeStrategy.COMBINE) { + final ValueSequence combined = new ValueSequence(existingValue); + combined.addAll(valueResult); + mergedValue = combined; + } else if (mergeDuplicates == DuplicateMergeStrategy.USE_FIRST) { + mergedValue = existingValue; + } else if (mergeDuplicates == DuplicateMergeStrategy.USE_LAST) { + mergedValue = valueResult; + } else if (mergeDuplicates == DuplicateMergeStrategy.USE_ANY) { + mergedValue = existingValue; + } else if (mergeDuplicates == DuplicateMergeStrategy.REJECT) { + throw new XPathException(this, ErrorCodes.FOJS0003, + "Duplicate key in map:build: " + key.getStringValue()); + } else { + mergedValue = valueResult; + } + result = result.put(key, mergedValue); + } else { + result = result.put(key, valueResult); + } + } + + position++; + } + return result; + } finally { + if (keyFn != null) { + keyFn.close(); + } + if (valueFn != null) { + valueFn.close(); + } + if (duplicatesFn != null) { + duplicatesFn.close(); + } + } + } + /* * Reject duplicate items */ @@ -443,7 +736,13 @@ private enum Fn { KEYS("keys"), REMOVE("remove"), FOR_EACH("for-each"), - FIND("find"); + FIND("find"), + EMPTY("empty"), + ITEMS("items"), + ENTRIES("entries"), + KEYS_WHERE("keys-where"), + FILTER("filter"), + BUILD("build"); final static Map fnMap = new HashMap<>(); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/map/MapModule.java b/exist-core/src/main/java/org/exist/xquery/functions/map/MapModule.java index 0eec5b52553..e16982a32a1 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/map/MapModule.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/map/MapModule.java @@ -55,7 +55,17 @@ public class MapModule extends AbstractInternalModule { MapFunction.PUT, MapFunction.ENTRY, MapFunction.REMOVE, - MapFunction.FOR_EACH + MapFunction.FOR_EACH, + // XQuery 4.0 map functions + MapFunction.FNS_EMPTY, + MapFunction.FNS_ITEMS, + MapFunction.FNS_ENTRIES, + MapFunction.FNS_KEYS_WHERE, + MapFunction.FNS_FILTER, + MapFunction.FS_BUILD[0], + MapFunction.FS_BUILD[1], + MapFunction.FS_BUILD[2], + MapFunction.FS_BUILD[3] ); public MapModule(Map> parameters) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/map/MapType.java b/exist-core/src/main/java/org/exist/xquery/functions/map/MapType.java index 2d635921100..9ec23bda3fe 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/map/MapType.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/map/MapType.java @@ -37,12 +37,15 @@ import io.lacuna.bifurcan.IEntry; import io.lacuna.bifurcan.IMap; import io.lacuna.bifurcan.LinearMap; +import io.lacuna.bifurcan.Maps; import io.lacuna.bifurcan.Map; import org.exist.xquery.*; import org.exist.xquery.value.*; import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.Optional; import java.util.function.BinaryOperator; import java.util.function.ToLongFunction; @@ -60,6 +63,10 @@ public class MapType extends AbstractMapType { // TODO(AR) future potential optimisation... could the class member `map` remain `linear` ? private IMap map; + // XQ4: insertion-order tracking for ordered maps + @Nullable + private List insertionOrder; + /** * The type of the keys in the map, * if not all keys have the same type @@ -206,7 +213,22 @@ public AbstractMapType merge(final Iterable others) { } // return an immutable map - return new MapType(getExpression(), context, newMap.forked(), prevType); + final MapType result = new MapType(getExpression(), context, newMap.forked(), prevType); + // Propagate insertion order from this map + others + if (insertionOrder != null) { + final List newOrder = new ArrayList<>(insertionOrder); + for (final AbstractMapType other : others) { + if (other instanceof MapType otherMap && otherMap.insertionOrder != null) { + for (final AtomicValue key : otherMap.insertionOrder) { + if (!newOrder.contains(key)) { + newOrder.add(key); + } + } + } + } + result.setInsertionOrder(newOrder); + } + return result; } @Override @@ -263,7 +285,15 @@ public Sequence get(AtomicValue key) { @Override public AbstractMapType put(final AtomicValue key, final Sequence value) { final IMap newMap = map.put(key, value); - return new MapType(getExpression(), this.context, newMap, keyType == key.getType() ? keyType : MIXED_KEY_TYPES); + final MapType result = new MapType(getExpression(), this.context, newMap, keyType == key.getType() ? keyType : MIXED_KEY_TYPES); + if (insertionOrder != null) { + final List newOrder = new ArrayList<>(insertionOrder); + if (!map.contains(key)) { + newOrder.add(key); + } + result.setInsertionOrder(newOrder); + } + return result; } @Override @@ -298,6 +328,13 @@ public boolean contains(final Item item) { @Override public Sequence keys() { + if (insertionOrder != null) { + final ArrayListValueSequence seq = new ArrayListValueSequence(insertionOrder.size()); + for (final AtomicValue key : insertionOrder) { + seq.add(key); + } + return seq; + } final ArrayListValueSequence seq = new ArrayListValueSequence((int)map.size()); for (final AtomicValue key: map.keys()) { seq.add(key); @@ -305,6 +342,13 @@ public Sequence keys() { return seq; } + /** + * Set insertion order tracking for XQ4 ordered maps. + */ + public void setInsertionOrder(@Nullable final List order) { + this.insertionOrder = order; + } + public AbstractMapType remove(final AtomicValue[] keysAtomicValues) { // create a transient map @@ -315,7 +359,15 @@ public AbstractMapType remove(final AtomicValue[] keysAtomicValues) { } // return an immutable map - return new MapType(getExpression(), context, newMap.forked(), keyType); + final MapType result = new MapType(getExpression(), context, newMap.forked(), keyType); + if (insertionOrder != null) { + final List newOrder = new ArrayList<>(insertionOrder); + for (final AtomicValue key : keysAtomicValues) { + newOrder.remove(key); + } + result.setInsertionOrder(newOrder); + } + return result; } @Override @@ -325,6 +377,11 @@ public int size() { @Override public Iterator> iterator() { + if (insertionOrder != null) { + return insertionOrder.stream() + .map(key -> (IEntry) new Maps.Entry<>(key, map.get(key, Sequence.EMPTY_SEQUENCE))) + .iterator(); + } return map.iterator(); } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/map/SingleKeyMapType.java b/exist-core/src/main/java/org/exist/xquery/functions/map/SingleKeyMapType.java index 338cfd58ae8..9238c8b6eaf 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/map/SingleKeyMapType.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/map/SingleKeyMapType.java @@ -90,11 +90,16 @@ public AbstractMapType merge(final Iterable others, final Binar @Override public AbstractMapType put(final AtomicValue key, final Sequence value) { final IMap map = newLinearMap(collator); + final java.util.List keyOrder = new java.util.ArrayList<>(2); int keyType = UNKNOWN_KEY_TYPE; if (this.key != null) { map.put(this.key, this.value); + keyOrder.add(this.key); keyType = this.key.getType(); } + if (!map.contains(key)) { + keyOrder.add(key); + } map.put(key, value); if (keyType != key.getType()) { keyType = MIXED_KEY_TYPES; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/math/MathModule.java b/exist-core/src/main/java/org/exist/xquery/functions/math/MathModule.java index be6211ee905..75d665def8a 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/math/MathModule.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/math/MathModule.java @@ -53,8 +53,12 @@ public class MathModule extends AbstractInternalModule { new FunctionDef(OneParamFunctions.FNS_SIN, OneParamFunctions.class), new FunctionDef(OneParamFunctions.FNS_SQRT, OneParamFunctions.class), new FunctionDef(OneParamFunctions.FNS_TAN, OneParamFunctions.class), - + new FunctionDef(OneParamFunctions.FNS_COSH, OneParamFunctions.class), + new FunctionDef(OneParamFunctions.FNS_SINH, OneParamFunctions.class), + new FunctionDef(OneParamFunctions.FNS_TANH, OneParamFunctions.class), + new FunctionDef(NoParamFunctions.FNS_PI, NoParamFunctions.class), + new FunctionDef(NoParamFunctions.FNS_E, NoParamFunctions.class), new FunctionDef(TwoParamFunctions.FNS_ATAN2, TwoParamFunctions.class), new FunctionDef(TwoParamFunctions.FNS_POW, TwoParamFunctions.class) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/math/NoParamFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/math/NoParamFunctions.java index 68417874a90..6ce93ed07b4 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/math/NoParamFunctions.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/math/NoParamFunctions.java @@ -43,6 +43,7 @@ public class NoParamFunctions extends BasicFunction { //private static final Logger logger = LogManager.getLogger(NoParamFunctions.class); public static final String PI = "pi"; + public static final String E = "e"; public final static FunctionSignature FNS_PI = new FunctionSignature( new QName(PI, MathModule.NAMESPACE_URI, MathModule.PREFIX), @@ -51,6 +52,13 @@ public class NoParamFunctions extends BasicFunction { new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the value of pi") ); + public final static FunctionSignature FNS_E = new FunctionSignature( + new QName(E, MathModule.NAMESPACE_URI, MathModule.PREFIX), + "Returns the value of e (Euler's number, approximately 2.71828).", + null, + new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the value of e") + ); + public NoParamFunctions(XQueryContext context, FunctionSignature signature) { super(context, signature); } @@ -68,7 +76,8 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce final String functionName = getSignature().getName().getLocalPart(); if(PI.equals(functionName)) { result=new DoubleValue(this, Math.PI); - + } else if(E.equals(functionName)) { + result=new DoubleValue(this, Math.E); } else { throw new XPathException(this, "Function "+functionName+" not found."); } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/math/OneParamFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/math/OneParamFunctions.java index ca3f330249a..dea6812eed7 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/math/OneParamFunctions.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/math/OneParamFunctions.java @@ -47,84 +47,108 @@ public class OneParamFunctions extends BasicFunction { public static final String SIN = "sin"; public static final String SQRT = "sqrt"; public static final String TAN = "tan"; + public static final String COSH = "cosh"; + public static final String SINH = "sinh"; + public static final String TANH = "tanh"; public final static FunctionSignature FNS_ACOS = new FunctionSignature( new QName(ACOS, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the arc cosine of the argument, the result being in the range zero to +π radians.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the result") ); public final static FunctionSignature FNS_ASIN = new FunctionSignature( new QName(ASIN, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the arc sine of the argument, the result being in the range -π/2 to +π/2 radians.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "result") ); public final static FunctionSignature FNS_ATAN = new FunctionSignature( new QName(ATAN, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the arc tangent of the argument, the result being in the range -π/2 to +π/2 radians.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the result") ); public final static FunctionSignature FNS_COS = new FunctionSignature( new QName(COS, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the cosine of the argument, expressed in radians.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("radians", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the cosine") ); public final static FunctionSignature FNS_EXP = new FunctionSignature( new QName(EXP, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Calculates e (the Euler Constant) raised to the power of $arg", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "e (the Euler Constant) raised to the power of a value or expression") ); public final static FunctionSignature FNS_EXP10 = new FunctionSignature( // NEW new QName(EXP10, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Calculates 10 raised to the power of $arg", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "e (the Euler Constant) raised to the power of a value or expression") ); public final static FunctionSignature FNS_LOG = new FunctionSignature( new QName(LOG, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the natural logarithm of the argument.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the log") ); public final static FunctionSignature FNS_LOG10 = new FunctionSignature( // NEW new QName(LOG10, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the base-ten logarithm of the argument.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the log") ); public final static FunctionSignature FNS_SIN = new FunctionSignature( new QName(SIN, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the sine of the argument, expressed in radians.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("radians", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the sine") ); public final static FunctionSignature FNS_SQRT = new FunctionSignature( new QName(SQRT, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the non-negative square root of the argument.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, + new SequenceType[]{new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input number")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the square root of $x") ); public final static FunctionSignature FNS_TAN = new FunctionSignature( new QName(TAN, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the tangent of the argument, expressed in radians.", - new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The radians")}, + new SequenceType[]{new FunctionParameterSequenceType("radians", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The radians")}, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the tangent") ); + public final static FunctionSignature FNS_COSH = new FunctionSignature( + new QName(COSH, MathModule.NAMESPACE_URI, MathModule.PREFIX), + "Returns the hyperbolic cosine of the argument.", + new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input value")}, + new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the hyperbolic cosine") + ); + + public final static FunctionSignature FNS_SINH = new FunctionSignature( + new QName(SINH, MathModule.NAMESPACE_URI, MathModule.PREFIX), + "Returns the hyperbolic sine of the argument.", + new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input value")}, + new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the hyperbolic sine") + ); + + public final static FunctionSignature FNS_TANH = new FunctionSignature( + new QName(TANH, MathModule.NAMESPACE_URI, MathModule.PREFIX), + "Returns the hyperbolic tangent of the argument.", + new SequenceType[]{new FunctionParameterSequenceType("arg", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The input value")}, + new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the hyperbolic tangent") + ); + public OneParamFunctions(XQueryContext context, FunctionSignature signature) { super(context, signature); } @@ -156,6 +180,9 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce case SIN -> Math.sin(value.getDouble()); case SQRT -> Math.sqrt(value.getDouble()); case TAN -> Math.tan(value.getDouble()); + case COSH -> Math.cosh(value.getDouble()); + case SINH -> Math.sinh(value.getDouble()); + case TANH -> Math.tanh(value.getDouble()); case null -> throw new XPathException(this, ERROR, "Function " + functionName + " not found."); default -> 0; }; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/math/TwoParamFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/math/TwoParamFunctions.java index 8e45fd0d08c..18fb3737ce1 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/math/TwoParamFunctions.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/math/TwoParamFunctions.java @@ -50,8 +50,8 @@ public class TwoParamFunctions extends BasicFunction { new QName(POW, MathModule.NAMESPACE_URI, MathModule.PREFIX), "Returns the result of raising the first argument to the power of the second.", new SequenceType[]{ - new FunctionParameterSequenceType("value", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The value"), - new FunctionParameterSequenceType("power", Type.NUMERIC, Cardinality.EXACTLY_ONE, "The power to raise the value to") + new FunctionParameterSequenceType("x", Type.DOUBLE, Cardinality.ZERO_OR_ONE, "The value"), + new FunctionParameterSequenceType("y", Type.NUMERIC, Cardinality.EXACTLY_ONE, "The power to raise the value to") }, new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.ZERO_OR_ONE, "the result") ); diff --git a/exist-core/src/main/java/org/exist/xquery/regex/RegexUtil.java b/exist-core/src/main/java/org/exist/xquery/regex/RegexUtil.java index d54ca496c01..31c372db37f 100644 --- a/exist-core/src/main/java/org/exist/xquery/regex/RegexUtil.java +++ b/exist-core/src/main/java/org/exist/xquery/regex/RegexUtil.java @@ -155,7 +155,36 @@ public static String translateRegexp(final Expression context, final String patt final List warnings = new ArrayList<>(); return JDK15RegexTranslator.translate(pattern, options, flagbits, warnings); } catch (final RegexSyntaxException e) { + // Fallback: if the pattern uses \p{Is} Unicode block names that + // the bundled Saxon regex translator doesn't recognize, convert them to + // Java's \p{In} syntax and try compiling directly. + if (pattern.contains("\\p{Is") || pattern.contains("\\P{Is")) { + final String javaPattern = convertUnicodeBlockNames(pattern); + try { + int flags = Pattern.UNICODE_CHARACTER_CLASS; + if (ignoreWhitespace) { + flags |= Pattern.COMMENTS; + } + if (caseBlind) { + flags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; + } + Pattern.compile(javaPattern, flags); + return javaPattern; + } catch (final java.util.regex.PatternSyntaxException ignored) { + // fallback failed, throw original error + } + } throw new XPathException(context, ErrorCodes.FORX0002, "Conversion from XPath F&O 3.0 regular expression syntax to Java regular expression syntax failed: " + e.getMessage(), new StringValue(pattern), e); } } + + /** + * Convert XML Schema/XPath \p{Is} and \P{Is} Unicode block + * property escapes to Java's \p{In} and \P{In} syntax. + */ + private static String convertUnicodeBlockNames(final String pattern) { + return pattern + .replaceAll("\\\\p\\{Is([^}]+)}", "\\\\p{In$1}") + .replaceAll("\\\\P\\{Is([^}]+)}", "\\\\P{In$1}"); + } } diff --git a/exist-core/src/main/java/org/exist/xquery/util/DocUtils.java b/exist-core/src/main/java/org/exist/xquery/util/DocUtils.java index 22e3e15721c..be77f2c4a94 100644 --- a/exist-core/src/main/java/org/exist/xquery/util/DocUtils.java +++ b/exist-core/src/main/java/org/exist/xquery/util/DocUtils.java @@ -99,10 +99,19 @@ private static Sequence getDocumentByPath(final XQueryContext context, final Str Sequence doc = getFromDynamicallyAvailableDocuments(context, path, expression); if (doc == null) { if (PTN_PROTOCOL_PREFIX.matcher(path).matches() && !path.startsWith("xmldb:")) { - /* URL */ - doc = getDocumentByPathFromURL(context, path, expression); + /* URL — use SourceFactory (has security checks) */ + doc = getDocumentByPathFromURL(context, path, expression, false); + } else if (!PTN_PROTOCOL_PREFIX.matcher(path).matches()) { + // Relative URI: resolve against static base URI per XQuery spec §2.1.2 + final String resolved = resolveAgainstBaseUri(context, path); + if (resolved != null && resolved.startsWith("file:")) { + doc = getDocumentByPathFromURL(context, resolved, expression, true); + } else { + /* Database documents */ + doc = getDocumentByPathFromDB(context, path, expression); + } } else { - /* Database documents */ + /* Database documents (xmldb: prefix) */ doc = getDocumentByPathFromDB(context, path, expression); } } @@ -110,6 +119,29 @@ private static Sequence getDocumentByPath(final XQueryContext context, final Str return doc; } + /** + * Resolve a relative URI against the static base URI. + * + * @return the resolved URI string, or null if resolution is not possible + */ + private static @Nullable String resolveAgainstBaseUri(final XQueryContext context, final String relativePath) { + try { + final AnyURIValue baseXdmUri = context.getBaseURI(); + if (baseXdmUri != null && !baseXdmUri.equals(AnyURIValue.EMPTY_URI)) { + String baseStr = baseXdmUri.toURI().toString(); + // Strip filename to get directory URI + final int lastSlash = baseStr.lastIndexOf('/'); + if (lastSlash >= 0) { + baseStr = baseStr.substring(0, lastSlash + 1); + } + return new URI(baseStr).resolve(relativePath).toString(); + } + } catch (final URISyntaxException | XPathException e) { + // fall through + } + return null; + } + private static @Nullable Sequence getFromDynamicallyAvailableDocuments(final XQueryContext context, final String path) throws XPathException { return getFromDynamicallyAvailableDocuments(context, path, null); } @@ -134,11 +166,28 @@ private static Sequence getDocumentByPath(final XQueryContext context, final Str } private static Sequence getDocumentByPathFromURL(final XQueryContext context, final String path) throws XPathException, PermissionDeniedException { - return getDocumentByPathFromURL(context, path, null); + return getDocumentByPathFromURL(context, path, null, false); } - private static Sequence getDocumentByPathFromURL(final XQueryContext context, final String path, final Expression expression) throws XPathException, PermissionDeniedException { + private static Sequence getDocumentByPathFromURL(final XQueryContext context, final String path, final Expression expression, final boolean resolvedFromBaseUri) throws XPathException, PermissionDeniedException { try { + // Only use direct file: access for URIs resolved from a relative path + // against a file: base URI. Absolute file: URIs go through SourceFactory + // which enforces security checks (e.g., blocking file:///etc/passwd). + if (resolvedFromBaseUri && path.startsWith("file:")) { + final String filePath = path.replaceFirst("^file:(?://[^/]*)?", ""); + final java.nio.file.Path nioPath = java.nio.file.Paths.get(filePath); + if (java.nio.file.Files.isReadable(nioPath)) { + try (final java.io.InputStream fis = java.nio.file.Files.newInputStream(nioPath)) { + final org.exist.dom.memtree.DocumentImpl memtreeDoc = parse( + context.getBroker().getBrokerPool(), context, fis, expression); + memtreeDoc.setDocumentURI(path); + return memtreeDoc; + } + } + return Sequence.EMPTY_SEQUENCE; + } + final Source source = SourceFactory.getSource(context.getBroker(), "", path, false); if (source == null) { return Sequence.EMPTY_SEQUENCE; diff --git a/exist-core/src/main/java/org/exist/xquery/util/NumberFormatter_en.java b/exist-core/src/main/java/org/exist/xquery/util/NumberFormatter_en.java index bf51c0b26f1..47a8c337ef5 100644 --- a/exist-core/src/main/java/org/exist/xquery/util/NumberFormatter_en.java +++ b/exist-core/src/main/java/org/exist/xquery/util/NumberFormatter_en.java @@ -36,7 +36,8 @@ public NumberFormatter_en(final Locale locale) { @Override public String getOrdinalSuffix(long number) { - if (number > 10 && number < 20) + final long lastTwo = number % 100; + if (lastTwo > 10 && lastTwo < 20) {return "th";} final long mod = number % 10; if (mod == 1) diff --git a/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java b/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java index 9649e835344..0bf4092619a 100644 --- a/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java +++ b/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java @@ -126,16 +126,25 @@ public interface ParameterConvention { public enum W3CParameterConvention implements ParameterConvention { ALLOW_DUPLICATE_NAMES("allow-duplicate-names", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE), BYTE_ORDER_MARK("byte-order-mark", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE), + CANONICAL(EXistOutputKeys.CANONICAL, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE), CDATA_SECTION_ELEMENTS(OutputKeys.CDATA_SECTION_ELEMENTS, Type.QNAME, Cardinality.ZERO_OR_MORE, Sequence.EMPTY_SEQUENCE), DOCTYPE_PUBLIC(OutputKeys.DOCTYPE_PUBLIC, Type.STRING, Cardinality.ZERO_OR_ONE, Sequence.EMPTY_SEQUENCE), //default: () means "absent" DOCTYPE_SYSTEM(OutputKeys.DOCTYPE_SYSTEM, Type.STRING, Cardinality.ZERO_OR_ONE, Sequence.EMPTY_SEQUENCE), //default: () means "absent" ENCODING(OutputKeys.ENCODING, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue(UTF_8.name())), + ESCAPE_SOLIDUS(EXistOutputKeys.ESCAPE_SOLIDUS, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.TRUE), ESCAPE_URI_ATTRIBUTES("escape-uri-attributes", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.TRUE), HTML_VERSION(EXistOutputKeys.HTML_VERSION, Type.DECIMAL, Cardinality.ZERO_OR_ONE, new DecimalValue(5)), INCLUDE_CONTENT_TYPE("include-content-type", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.TRUE), INDENT(OutputKeys.INDENT, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE), ITEM_SEPARATOR(EXistOutputKeys.ITEM_SEPARATOR, Type.STRING, Cardinality.ZERO_OR_ONE, Sequence.EMPTY_SEQUENCE), //default: () means "absent" + JSON_LINES(EXistOutputKeys.JSON_LINES, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE), JSON_NODE_OUTPUT_METHOD(EXistOutputKeys.JSON_NODE_OUTPUT_METHOD, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue("xml")), + // CSV serialization parameters (vendor extension, modeled on BaseX) + CSV_FIELD_DELIMITER(EXistOutputKeys.CSV_FIELD_DELIMITER, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue(",")), + CSV_ROW_DELIMITER(EXistOutputKeys.CSV_ROW_DELIMITER, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue("\n")), + CSV_QUOTE_CHARACTER(EXistOutputKeys.CSV_QUOTE_CHARACTER, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue("\"")), + CSV_HEADER(EXistOutputKeys.CSV_HEADER, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE), + CSV_QUOTES(EXistOutputKeys.CSV_QUOTES, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.TRUE), MEDIA_TYPE(OutputKeys.MEDIA_TYPE, Type.STRING, Cardinality.ZERO_OR_ONE, Sequence.EMPTY_SEQUENCE), // default: a media type suitable for the chosen method METHOD(OutputKeys.METHOD, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue("xml")), NORMALIZATION_FORM("normalization-form", Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue("none")), @@ -286,13 +295,21 @@ private static void readStartElement(final Expression parent, final XMLStreamRea final javax.xml.namespace.QName key = reader.getName(); final String local = key.getLocalPart(); final String prefix = key.getPrefix(); + final String nsURI = key.getNamespaceURI(); if (properties.containsKey(local)) { throw new XPathException(parent, FnModule.SEPM0019, "serialization parameter specified twice: " + key); } - if (prefix.equals(OUTPUT_NAMESPACE) && !W3CParameterConventionKeys.contains(local)) { + if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(nsURI) && !W3CParameterConventionKeys.contains(local)) { throw new XPathException(ErrorCodes.SEPM0017, "serialization parameter not recognized: " + key); } + // Accept eXist-specific parameters from the exist: namespace (issue #3446) + // These include expand-xincludes, highlight-matches, process-xsl-pi, add-exist-id, jsonp, etc. + if (Namespaces.EXIST_NS.equals(nsURI)) { + readSerializationProperty(reader, local, properties); + return; + } + readSerializationProperty(reader, local, properties); } @@ -413,13 +430,21 @@ public static void setProperty(final String key, final String value, final Prope qnamesValue.append(' '); } - final String[] prefixAndLocal = qnameStr.split(":"); - if (prefixAndLocal.length == 1) { - qnamesValue.append("{}").append(prefixAndLocal[0]); - } else if (prefixAndLocal.length == 2) { - final String prefix = prefixAndLocal[0]; - final String ns = prefixToNs.apply(prefix); - qnamesValue.append('{').append(ns).append('}').append(prefixAndLocal[1]); + // Handle Q{ns}local (URIQualifiedName) — pass through as {ns}local + if (qnameStr.startsWith("Q{") && qnameStr.contains("}")) { + final int closeBrace = qnameStr.indexOf('}'); + final String ns = qnameStr.substring(2, closeBrace); + final String local = qnameStr.substring(closeBrace + 1); + qnamesValue.append('{').append(ns).append('}').append(local); + } else { + final String[] prefixAndLocal = qnameStr.split(":"); + if (prefixAndLocal.length == 1) { + qnamesValue.append("{}").append(prefixAndLocal[0]); + } else if (prefixAndLocal.length == 2) { + final String prefix = prefixAndLocal[0]; + final String ns = prefixToNs.apply(prefix); + qnamesValue.append('{').append(ns).append('}').append(prefixAndLocal[1]); + } } } @@ -502,6 +527,7 @@ private static Sequence getDefaultMediaType(final Sequence providedMethod) throw case "jsonp" -> new StringValue("application/javascript"); case "html" -> new StringValue("text/html"); case "adaptive", "text" -> new StringValue("text/plain"); + case "csv" -> new StringValue("text/csv"); case "binary" -> new StringValue("application/octet-stream"); default -> throw new UnsupportedOperationException("Unrecognised serialization method: " + method); }; @@ -520,7 +546,8 @@ private static boolean checkTypes(final ParameterConvention parameterConventi final SequenceIterator iterator = sequence.iterate(); while (iterator.hasNext()) { final Item item = iterator.nextItem(); - if (parameterConvention.getType() != item.getType()) { + // Use subtype check: xs:integer is a valid xs:decimal, xs:string subtypes are valid xs:string, etc. + if (!Type.subTypeOf(item.getType(), parameterConvention.getType())) { return false; } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/AbstractDateTimeValue.java b/exist-core/src/main/java/org/exist/xquery/value/AbstractDateTimeValue.java index 4b4f36150e8..9e695cab84d 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/AbstractDateTimeValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/AbstractDateTimeValue.java @@ -186,6 +186,29 @@ protected XMLGregorianCalendar getImplicitCalendar() { implicitCalendar.setMonth(12); implicitCalendar.setDay(31); break; + case Type.G_YEAR: + implicitCalendar.setMonth(1); + implicitCalendar.setDay(1); + implicitCalendar.setTime(0, 0, 0); + break; + case Type.G_YEAR_MONTH: + implicitCalendar.setDay(1); + implicitCalendar.setTime(0, 0, 0); + break; + case Type.G_MONTH: + implicitCalendar.setYear(1972); + implicitCalendar.setDay(1); + implicitCalendar.setTime(0, 0, 0); + break; + case Type.G_MONTH_DAY: + implicitCalendar.setYear(1972); + implicitCalendar.setTime(0, 0, 0); + break; + case Type.G_DAY: + implicitCalendar.setYear(1972); + implicitCalendar.setMonth(1); + implicitCalendar.setTime(0, 0, 0); + break; default: } implicitCalendar = implicitCalendar.normalize(); // the comparison routines will normalize it anyway, just do it once here diff --git a/exist-core/src/main/java/org/exist/xquery/value/AnyURIValue.java b/exist-core/src/main/java/org/exist/xquery/value/AnyURIValue.java index e25227af336..af144361828 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/AnyURIValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/AnyURIValue.java @@ -282,7 +282,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "Type error: cannot cast xs:anyURI to " + Type.getTypeName(requiredType)); } diff --git a/exist-core/src/main/java/org/exist/xquery/value/AtomicValueComparator.java b/exist-core/src/main/java/org/exist/xquery/value/AtomicValueComparator.java index 1eda756ca15..48059df743e 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/AtomicValueComparator.java +++ b/exist-core/src/main/java/org/exist/xquery/value/AtomicValueComparator.java @@ -74,7 +74,9 @@ public int compare(final AtomicValue o1, final AtomicValue o2) { return o1.compareTo(collator, o2); } catch (final XPathException e) { LOG.error(e.getMessage(), e); - throw new ClassCastException(e.getMessage()); + final ClassCastException cce = new ClassCastException(e.getMessage()); + cce.initCause(e); + throw cce; } } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/Base64BinaryValueType.java b/exist-core/src/main/java/org/exist/xquery/value/Base64BinaryValueType.java index 3e96607b5a8..9f6c1027a77 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/Base64BinaryValueType.java +++ b/exist-core/src/main/java/org/exist/xquery/value/Base64BinaryValueType.java @@ -22,6 +22,7 @@ package org.exist.xquery.value; import org.exist.util.io.Base64OutputStream; +import org.exist.xquery.ErrorCodes; import org.exist.xquery.Expression; import org.exist.xquery.XPathException; @@ -50,7 +51,7 @@ private Matcher getMatcher(final String toMatch) { @Override public void verifyString(String str) throws XPathException { if (!getMatcher(str).matches()) { - throw new XPathException((Expression) null, "FORG0001: Invalid base64 data"); + throw new XPathException((Expression) null, ErrorCodes.FORG0001, "Invalid base64 data"); } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/BinaryValue.java b/exist-core/src/main/java/org/exist/xquery/value/BinaryValue.java index bff4a04ad58..dcf98ae50f3 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/BinaryValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/BinaryValue.java @@ -195,7 +195,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { //TODO still needed? Added trim() since it looks like a new line character is added new StringValue(getExpression(), getStringValue()); default -> - throw new XPathException(getExpression(), ErrorCodes.FORG0001, "cannot convert " + Type.getTypeName(getType()) + " to " + Type.getTypeName(requiredType)); + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "cannot convert " + Type.getTypeName(getType()) + " to " + Type.getTypeName(requiredType)); }; } return result; diff --git a/exist-core/src/main/java/org/exist/xquery/value/DateTimeValue.java b/exist-core/src/main/java/org/exist/xquery/value/DateTimeValue.java index fadaac67da4..f1a5dc00cb7 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/DateTimeValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/DateTimeValue.java @@ -172,7 +172,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "Type error: cannot cast xs:dateTime to " + Type.getTypeName(requiredType)); } diff --git a/exist-core/src/main/java/org/exist/xquery/value/DateValue.java b/exist-core/src/main/java/org/exist/xquery/value/DateValue.java index 2701d6da74d..2f5e681aa7c 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/DateValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/DateValue.java @@ -122,7 +122,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { return new StringValue(getExpression(), dv.getStringValue()); } default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, "can not convert " + + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "can not convert " + Type.getTypeName(getType()) + "('" + getStringValue() + "') to " + Type.getTypeName(requiredType)); } diff --git a/exist-core/src/main/java/org/exist/xquery/value/DecimalValue.java b/exist-core/src/main/java/org/exist/xquery/value/DecimalValue.java index d69144666b9..fb8e9c3a652 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/DecimalValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/DecimalValue.java @@ -260,7 +260,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.BOOLEAN: return value.signum() == 0 ? BooleanValue.FALSE : BooleanValue.TRUE; default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "cannot convert '" + Type.getTypeName(this.getType()) + " (" diff --git a/exist-core/src/main/java/org/exist/xquery/value/DoubleValue.java b/exist-core/src/main/java/org/exist/xquery/value/DoubleValue.java index 3cd6cd24094..76cf79945b5 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/DoubleValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/DoubleValue.java @@ -195,21 +195,21 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { public DecimalValue toDecimalValue() throws XPathException { if (isNaN() || isInfinite()) { - throw conversionError(Type.DECIMAL); + throw nanInfConversionError(Type.DECIMAL); } return new DecimalValue(getExpression(), BigDecimal.valueOf(value)); } public IntegerValue toIntegerValue() throws XPathException { if (isNaN() || isInfinite()) { - throw conversionError(Type.INTEGER); + throw nanInfConversionError(Type.INTEGER); } return new IntegerValue(getExpression(), (long) value); } public IntegerValue toIntegerSubType(final int subType) throws XPathException { if (isNaN() || isInfinite()) { - throw conversionError(subType); + throw nanInfConversionError(subType); } if (subType != Type.INTEGER && value > Integer.MAX_VALUE) { throw new XPathException(getExpression(), ErrorCodes.FOCA0003, "Value is out of range for type " @@ -219,7 +219,13 @@ public IntegerValue toIntegerSubType(final int subType) throws XPathException { } private XPathException conversionError(final int type) { - return new XPathException(getExpression(), ErrorCodes.FORG0001, "Cannot convert " + return new XPathException(getExpression(), ErrorCodes.XPTY0004, "Cannot convert " + + Type.getTypeName(getType()) + "('" + getStringValue() + "') to " + + Type.getTypeName(type)); + } + + private XPathException nanInfConversionError(final int type) { + return new XPathException(getExpression(), ErrorCodes.FOCA0002, "Cannot convert " + Type.getTypeName(getType()) + "('" + getStringValue() + "') to " + Type.getTypeName(type)); } diff --git a/exist-core/src/main/java/org/exist/xquery/value/DurationValue.java b/exist-core/src/main/java/org/exist/xquery/value/DurationValue.java index 192d8bf8537..0f14d72d50b 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/DurationValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/DurationValue.java @@ -214,7 +214,7 @@ protected BigDecimal secondsValue() { ).add(zeroIfNull((BigDecimal) duration.getField(DatatypeConstants.SECONDS))); } - protected BigDecimal secondsValueSigned() { + public BigDecimal secondsValueSigned() { BigDecimal x = secondsValue(); if (duration.getSign() < 0) { x = x.negate(); @@ -229,7 +229,7 @@ protected BigInteger monthsValue() { .add(zeroIfNull((BigInteger) duration.getField(DatatypeConstants.MONTHS))); } - protected BigInteger monthsValueSigned() { + public BigInteger monthsValueSigned() { BigInteger x = monthsValue(); if (duration.getSign() < 0) { x = x.negate(); @@ -312,8 +312,8 @@ public AtomicValue convertTo(int requiredType) throws XPathException { canonicalize(); return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, - "Type error: cannot cast ' + Type.getTypeName(getType()) 'to " + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, + "Type error: cannot cast " + Type.getTypeName(getType()) + " to " + Type.getTypeName(requiredType)); } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/FloatValue.java b/exist-core/src/main/java/org/exist/xquery/value/FloatValue.java index 6c67124e711..18de639b134 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/FloatValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/FloatValue.java @@ -230,7 +230,7 @@ public AtomicValue convertTo(int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, "cannot cast '" + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "cannot cast '" + Type.getTypeName(this.getItemType()) + "(\"" + getStringValue() diff --git a/exist-core/src/main/java/org/exist/xquery/value/FunctionParameterFunctionSequenceType.java b/exist-core/src/main/java/org/exist/xquery/value/FunctionParameterFunctionSequenceType.java index 4b64118c61a..9726dad7cac 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/FunctionParameterFunctionSequenceType.java +++ b/exist-core/src/main/java/org/exist/xquery/value/FunctionParameterFunctionSequenceType.java @@ -79,6 +79,41 @@ public FunctionParameterFunctionSequenceType(final String attributeName, final i this.returnType = returnType; } + /** + * Legacy constructor accepting SequenceType[] for backward compatibility with old-style signatures. + * + * @param attributeName The name of the parameter in the FunctionSignature. + * @param primaryType The Type of the parameter. + * @param parameterTypes The Types of parameters the function needs to accept. + * @param returnType The Type the function(s) needs to return. + * @param cardinality The Cardinality of the parameter. + * @param description A description of the parameter in the FunctionSignature. + * @see org.exist.xquery.FunctionSignature @see Type @see org.exist.xquery.Cardinality + */ + public FunctionParameterFunctionSequenceType(final String attributeName, final int primaryType, final SequenceType[] parameterTypes, final SequenceType returnType, final Cardinality cardinality, final String description) { + super(attributeName, primaryType, cardinality, description); + this.parameters = parameterTypes; + this.arity = parameterTypes.length; + this.returnType = returnType; + } + + /** + * Legacy shorthand if return type is unspecified. + * + * @param attributeName The name of the parameter in the FunctionSignature. + * @param primaryType The Type of the parameter. + * @param parameterTypes The Types of parameters the function needs to accept. + * @param cardinality The Cardinality of the parameter. + * @param description A description of the parameter in the FunctionSignature. + * @see org.exist.xquery.FunctionSignature @see Type @see org.exist.xquery.Cardinality + */ + public FunctionParameterFunctionSequenceType(final String attributeName, final int primaryType, final SequenceType[] parameterTypes, final Cardinality cardinality, final String description) { + super(attributeName, primaryType, cardinality, description); + this.parameters = parameterTypes; + this.arity = parameterTypes.length; + this.returnType = new SequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE); + } + @Override public boolean checkType(final Sequence seq) throws XPathException { // all functions? diff --git a/exist-core/src/main/java/org/exist/xquery/value/FunctionParameterSequenceType.java b/exist-core/src/main/java/org/exist/xquery/value/FunctionParameterSequenceType.java index b383b881d75..a2aa7078d52 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/FunctionParameterSequenceType.java +++ b/exist-core/src/main/java/org/exist/xquery/value/FunctionParameterSequenceType.java @@ -22,6 +22,7 @@ package org.exist.xquery.value; import org.exist.xquery.Cardinality; +import org.exist.xquery.Expression; /** * This class is used to specify the name and description of an XQuery function parameter. @@ -32,6 +33,7 @@ public class FunctionParameterSequenceType extends FunctionReturnSequenceType { private String attributeName; + private Expression defaultValue; /** * @param attributeName The name of the parameter in the FunctionSignature. @@ -79,4 +81,16 @@ public void setAttributeName(String attributeName) { this.attributeName = attributeName; } + public boolean hasDefaultValue() { + return defaultValue != null; + } + + public Expression getDefaultValue() { + return defaultValue; + } + + public void setDefaultValue(final Expression defaultValue) { + this.defaultValue = defaultValue; + } + } diff --git a/exist-core/src/main/java/org/exist/xquery/value/FunctionReference.java b/exist-core/src/main/java/org/exist/xquery/value/FunctionReference.java index bbac6e112b5..e691c0d2e3e 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/FunctionReference.java +++ b/exist-core/src/main/java/org/exist/xquery/value/FunctionReference.java @@ -177,7 +177,7 @@ public AtomicValue convertTo(int requiredType) throws XPathException { if (requiredType == Type.FUNCTION) { return this; } - throw new XPathException(getExpression(), ErrorCodes.FORG0001, "cannot convert function reference to " + Type.getTypeName(requiredType)); + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "cannot convert function reference to " + Type.getTypeName(requiredType)); } public boolean effectiveBooleanValue() throws XPathException { diff --git a/exist-core/src/main/java/org/exist/xquery/value/GDayValue.java b/exist-core/src/main/java/org/exist/xquery/value/GDayValue.java index 373e0292b5c..6986ea4bf90 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/GDayValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/GDayValue.java @@ -88,8 +88,8 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, - "Type error: cannot cast xs:time to " + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, + "Type error: cannot cast xs:gDay to " + Type.getTypeName(requiredType)); } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/GMonthDayValue.java b/exist-core/src/main/java/org/exist/xquery/value/GMonthDayValue.java index b81fb399e94..822e10d6f85 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/GMonthDayValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/GMonthDayValue.java @@ -85,8 +85,8 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, - "Type error: cannot cast xs:time to " + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, + "Type error: cannot cast xs:gMonthDay to " + Type.getTypeName(requiredType)); } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/GMonthValue.java b/exist-core/src/main/java/org/exist/xquery/value/GMonthValue.java index 69e54cf525b..194ed1ce713 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/GMonthValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/GMonthValue.java @@ -122,7 +122,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "Type error: cannot cast xs:gMonth to " + Type.getTypeName(requiredType)); } diff --git a/exist-core/src/main/java/org/exist/xquery/value/GYearMonthValue.java b/exist-core/src/main/java/org/exist/xquery/value/GYearMonthValue.java index 722af983323..99fb7e79e94 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/GYearMonthValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/GYearMonthValue.java @@ -87,8 +87,8 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, - "Type error: cannot cast xs:time to " + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, + "Type error: cannot cast xs:gYearMonth to " + Type.getTypeName(requiredType)); } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/GYearValue.java b/exist-core/src/main/java/org/exist/xquery/value/GYearValue.java index b1f67a4122f..48e55d5d238 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/GYearValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/GYearValue.java @@ -86,8 +86,8 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, - "Type error: cannot cast xs:time to " + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, + "Type error: cannot cast xs:gYear to " + Type.getTypeName(requiredType)); } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/IntegerValue.java b/exist-core/src/main/java/org/exist/xquery/value/IntegerValue.java index 56da7dd8815..bee0bf0a36b 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/IntegerValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/IntegerValue.java @@ -307,7 +307,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.BOOLEAN: return (value.compareTo(ZERO_BIGINTEGER) == 0) ? BooleanValue.FALSE : BooleanValue.TRUE; default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "cannot convert '" + Type.getTypeName(this.getType()) + " (" diff --git a/exist-core/src/main/java/org/exist/xquery/value/JavaObjectValue.java b/exist-core/src/main/java/org/exist/xquery/value/JavaObjectValue.java index 1e808bb170d..2c4c69e46f0 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/JavaObjectValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/JavaObjectValue.java @@ -65,7 +65,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { if (requiredType == Type.JAVA_OBJECT) { return this; } - throw new XPathException(getExpression(), ErrorCodes.FORG0001, + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "cannot convert Java object to " + Type.getTypeName(requiredType)); } diff --git a/exist-core/src/main/java/org/exist/xquery/value/NumericValue.java b/exist-core/src/main/java/org/exist/xquery/value/NumericValue.java index eeb940f6e42..87f71ab78b7 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/NumericValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/NumericValue.java @@ -153,10 +153,11 @@ public final int compareTo(final Collator collator, final AtomicValue other) thr if (Type.subTypeOfUnion(other.getType(), Type.NUMERIC)) { if (isNaN()) { - //NaN does not equal itself. - if (((NumericValue) other).isNaN()) { - return Constants.INFERIOR; - } + //NaN does not equal itself or any other value. + return Constants.INFERIOR; + } + if (((NumericValue) other).isNaN()) { + return Constants.SUPERIOR; } final IntSupplier comparison = createComparisonWith((NumericValue) other); diff --git a/exist-core/src/main/java/org/exist/xquery/value/QNameValue.java b/exist-core/src/main/java/org/exist/xquery/value/QNameValue.java index 05e94c3720d..84a3ec97ea4 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/QNameValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/QNameValue.java @@ -136,7 +136,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "A QName cannot be converted to " + Type.getTypeName(requiredType)); } } diff --git a/exist-core/src/main/java/org/exist/xquery/value/SequenceComparator.java b/exist-core/src/main/java/org/exist/xquery/value/SequenceComparator.java index a64b1e65692..39eeb0e4ddb 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/SequenceComparator.java +++ b/exist-core/src/main/java/org/exist/xquery/value/SequenceComparator.java @@ -71,7 +71,7 @@ public int compare(final Sequence o1, final Sequence o2) { } final int o1Count = o1.getItemCount(); - final int o2Count = o1.getItemCount(); + final int o2Count = o2.getItemCount(); if (o1Count < o2Count) { return -1; diff --git a/exist-core/src/main/java/org/exist/xquery/value/SequenceType.java b/exist-core/src/main/java/org/exist/xquery/value/SequenceType.java index f00c9811ea1..4006ee55e42 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/SequenceType.java +++ b/exist-core/src/main/java/org/exist/xquery/value/SequenceType.java @@ -30,6 +30,9 @@ import org.w3c.dom.Element; import org.w3c.dom.Node; +import java.util.ArrayList; +import java.util.List; + /** * Represents an XQuery SequenceType and provides methods to check * sequences and items against this type. @@ -41,6 +44,8 @@ public class SequenceType { private int primaryType = Type.ITEM; private Cardinality cardinality = Cardinality.EXACTLY_ONE; private QName nodeName = null; + private List choiceAlternatives = null; + private String[] enumValues = null; public SequenceType() { } @@ -108,6 +113,81 @@ public void setNodeName(QName qname) { this.nodeName = qname; } + public void addChoiceAlternative(final SequenceType alt) { + if (choiceAlternatives == null) { + choiceAlternatives = new ArrayList<>(); + } + choiceAlternatives.add(alt); + } + + public List getChoiceAlternatives() { + return choiceAlternatives; + } + + public boolean isChoiceType() { + return choiceAlternatives != null && !choiceAlternatives.isEmpty(); + } + + public void setEnumValues(final String[] values) { + this.enumValues = values; + this.primaryType = Type.STRING; + } + + public String[] getEnumValues() { + return enumValues; + } + + public boolean isEnumType() { + return enumValues != null; + } + + // Record type support + + /** + * Represents a field in a record type declaration. + */ + public static class RecordField { + private final String name; + private final boolean optional; + private final SequenceType fieldType; + + public RecordField(final String name, final boolean optional, final SequenceType fieldType) { + this.name = name; + this.optional = optional; + this.fieldType = fieldType; + } + + public String getName() { return name; } + public boolean isOptional() { return optional; } + public SequenceType getFieldType() { return fieldType; } + } + + private List recordFields = null; + private boolean recordExtensible = false; + + public void addRecordField(final RecordField field) { + if (recordFields == null) { + recordFields = new ArrayList<>(); + } + recordFields.add(field); + } + + public List getRecordFields() { + return recordFields; + } + + public void setRecordExtensible(final boolean extensible) { + this.recordExtensible = extensible; + } + + public boolean isRecordExtensible() { + return recordExtensible; + } + + public boolean isRecordType() { + return primaryType == Type.RECORD; + } + /** * Check the specified sequence against this SequenceType. * @@ -116,16 +196,28 @@ public void setNodeName(QName qname) { * @return true, if all items of the sequence have the same type as or a subtype of primaryType */ public boolean checkType(final Sequence seq) throws XPathException { - if (nodeName == null) { - return Type.subTypeOf(seq.getItemType(), primaryType); + if (isChoiceType()) { + Item next; + for (final SequenceIterator i = seq.iterate(); i.hasNext(); ) { + next = i.nextItem(); + if (!checkType(next)) { + return false; + } + } + return true; } - - for (final SequenceIterator i = seq.iterate(); i.hasNext(); ) { - if (!checkType(i.nextItem())) { - return false; + if (nodeName != null) { + Item next; + for (final SequenceIterator i = seq.iterate(); i.hasNext(); ) { + next = i.nextItem(); + if (!checkType(next)) { + return false; + } } + return true; + } else { + return Type.subTypeOf(seq.getItemType(), primaryType); } - return true; } /** @@ -135,54 +227,171 @@ public boolean checkType(final Sequence seq) throws XPathException { * @return true, if item is a subtype of primaryType */ public boolean checkType(final Item item) { + if (isChoiceType()) { + for (final SequenceType alt : choiceAlternatives) { + if (alt.checkType(item)) { + return true; + } + } + return false; + } + if (isEnumType()) { + if (!Type.subTypeOf(item.getType(), Type.STRING)) { + return false; + } + try { + final String val = item.getStringValue(); + for (final String enumVal : enumValues) { + if (enumVal.equals(val)) { + return true; + } + } + } catch (final XPathException e) { + // cannot get string value + } + return false; + } + if (isRecordType()) { + return checkRecordType(item); + } + Node realNode = null; int type = item.getType(); if (type == Type.NODE) { - final Node realNode = ((NodeValue) item).getNode(); + realNode = ((NodeValue) item).getNode(); type = realNode.getNodeType(); } if (!Type.subTypeOf(type, primaryType)) { return false; } - if (nodeName == null) { - return true; + if (nodeName != null) { + + //TODO : how to improve performance ? + + final NodeValue nvItem = (NodeValue) item; + QName realName = null; + if (item.getType() == Type.DOCUMENT) { + // it's a document... we need to get the document element's name + final Document doc; + if (nvItem instanceof Document) { + doc = (Document) nvItem; + } else { + doc = nvItem.getOwnerDocument(); + } + if (doc != null) { + final Element elem = doc.getDocumentElement(); + if (elem != null) { + realName = new QName(elem.getLocalName(), elem.getNamespaceURI()); + } + } + } else { + // get the name of the element/attribute + realName = nvItem.getQName(); + } + + if (realName == null) { + return false; + } + + if (nodeName.getNamespaceURI() != null) { + if (!nodeName.getNamespaceURI().equals(realName.getNamespaceURI())) { + return false; + } + } + if (nodeName.getLocalPart() != null) { + return nodeName.getLocalPart().equals(realName.getLocalPart()); + } } - //TODO : how to improve performance ? - final QName realName = getRealName(item); + return true; + } - if (realName == null) { + /** + * Check if an item matches this record type declaration. + * A map matches a record type if: + * - All required fields are present + * - Each field value matches the declared type + * - If not extensible (no *), no extra keys are present + */ + private boolean checkRecordType(final Item item) { + if (!Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { return false; } - if (nodeName.getNamespaceURI() != null && - !nodeName.getNamespaceURI().equals(realName.getNamespaceURI())) { - return false; + // record(*) matches any map + if (recordExtensible && (recordFields == null || recordFields.isEmpty())) { + return true; } - if (nodeName.getLocalPart() != null) { - return nodeName.getLocalPart().equals(realName.getLocalPart()); + final org.exist.xquery.functions.map.AbstractMapType map = + (org.exist.xquery.functions.map.AbstractMapType) item; + + // record() with no fields and not extensible: only empty maps match + if ((recordFields == null || recordFields.isEmpty()) && !recordExtensible) { + return map.size() == 0; } + + // Check required fields are present and types match + for (final RecordField field : recordFields) { + final AtomicValue key = new StringValue(null, field.getName()); + final boolean hasKey = map.contains(key); + + if (!hasKey && !field.isOptional()) { + return false; // required field missing + } + + if (hasKey && field.getFieldType() != null) { + try { + final Sequence value = map.get(key); + if (!field.getFieldType().matchesCardinality(value)) { + return false; + } + if (!value.isEmpty() && !field.getFieldType().checkType(value)) { + return false; + } + } catch (final XPathException e) { + return false; + } + } + } + + // If not extensible, check for extra keys + if (!recordExtensible) { + try { + final Sequence keys = map.keys(); + for (final SequenceIterator it = keys.iterate(); it.hasNext(); ) { + final String keyName = it.nextItem().getStringValue(); + boolean declared = false; + for (final RecordField field : recordFields) { + if (field.getName().equals(keyName)) { + declared = true; + break; + } + } + if (!declared) { + return false; // undeclared key in non-extensible record + } + } + } catch (final XPathException e) { + return false; + } + } + return true; } - private static QName getRealName(final Item item) { - final NodeValue nvItem = (NodeValue) item; - if (item.getType() != Type.DOCUMENT) { - // get the name of the element/attribute - return nvItem.getQName(); - } - // it's a document... we need to get the document element's name - final Document doc; - if (nvItem instanceof Document) { - doc = (Document) nvItem; - } else { - doc = nvItem.getOwnerDocument(); + /** + * Check if a sequence's cardinality matches this type's cardinality declaration. + */ + public boolean matchesCardinality(final Sequence seq) { + if (cardinality == Cardinality.ZERO_OR_MORE) { + return true; } - if (doc == null) { - return null; + final int count = seq.getItemCount(); + if (count == 0) { + return cardinality.isSuperCardinalityOrEqualOf(Cardinality.EMPTY_SEQUENCE); } - final Element elem = doc.getDocumentElement(); - if (elem == null) { - return null; + if (count == 1) { + return true; // EXACTLY_ONE, ZERO_OR_ONE, ONE_OR_MORE all accept 1 } - return new QName(elem.getLocalName(), elem.getNamespaceURI()); + // count > 1 + return cardinality == Cardinality.ONE_OR_MORE || cardinality == Cardinality.ZERO_OR_MORE; } /** @@ -197,14 +406,17 @@ public void checkType(int type) throws XPathException { return; } - // Although xs:anyURI is not a subtype of xs:string, both types are compatible + //Although xs:anyURI is not a subtype of xs:string, both types are compatible if (type == Type.ANY_URI && primaryType == Type.STRING) { return; } if (!Type.subTypeOf(type, primaryType)) { throw new XPathException((Expression) null, ErrorCodes.XPTY0004, - "Type error: expected type: " + Type.getTypeName(primaryType) + "; got: " + Type.getTypeName(type)); + "Type error: expected type: " + + Type.getTypeName(primaryType) + + "; got: " + + Type.getTypeName(type)); } } @@ -226,17 +438,38 @@ public void checkCardinality(Sequence seq) throws XPathException { } } - /** - * Used to serialize SequenceTypes, when building stack traces, for example. - * - * @return The serialized SequenceType - */ @Override public String toString() { if (cardinality == Cardinality.EMPTY_SEQUENCE) { return cardinality.toXQueryCardinalityString(); } + if (isChoiceType()) { + final StringBuilder sb = new StringBuilder("("); + for (int i = 0; i < choiceAlternatives.size(); i++) { + if (i > 0) { + sb.append(" | "); + } + sb.append(choiceAlternatives.get(i).toString()); + } + sb.append(")"); + sb.append(cardinality.toXQueryCardinalityString()); + return sb.toString(); + } + + if (isEnumType()) { + final StringBuilder sb = new StringBuilder("enum("); + for (int i = 0; i < enumValues.length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append("\"").append(enumValues[i]).append("\""); + } + sb.append(")"); + sb.append(cardinality.toXQueryCardinalityString()); + return sb.toString(); + } + final String str; if (primaryType == Type.DOCUMENT && nodeName != null) { str = "document-node(" + nodeName.getStringValue() + ")"; diff --git a/exist-core/src/main/java/org/exist/xquery/value/StringValue.java b/exist-core/src/main/java/org/exist/xquery/value/StringValue.java index 9b2fccf0c83..bc8748b504f 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/StringValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/StringValue.java @@ -379,15 +379,14 @@ private void checkType() throws XPathException { case Type.LANGUAGE: final Matcher matcher = langPattern.matcher(value); if (!matcher.matches()) { - throw new XPathException(getExpression(), - "Type error: string " - + value - + " is not valid for type xs:language"); + throw new XPathException(getExpression(), ErrorCodes.FORG0001, + "String '" + value + "' is not valid for type xs:language"); } return; case Type.NAME: if (QName.isQName(value) != VALID.val) { - throw new XPathException(getExpression(), "Type error: string " + value + " is not a valid xs:Name"); + throw new XPathException(getExpression(), ErrorCodes.FORG0001, + "String '" + value + "' is not a valid xs:Name"); } return; case Type.NCNAME: @@ -395,12 +394,14 @@ private void checkType() throws XPathException { case Type.IDREF: case Type.ENTITY: if (!XMLNames.isNCName(value)) { - throw new XPathException(getExpression(), "Type error: string " + value + " is not a valid " + Type.getTypeName(type)); + throw new XPathException(getExpression(), ErrorCodes.FORG0001, + "String '" + value + "' is not a valid " + Type.getTypeName(type)); } return; case Type.NMTOKEN: if (!XMLNames.isNmToken(value)) { - throw new XPathException(getExpression(), "Type error: string " + value + " is not a valid xs:NMTOKEN"); + throw new XPathException(getExpression(), ErrorCodes.FORG0001, + "String '" + value + "' is not a valid xs:NMTOKEN"); } } } @@ -489,7 +490,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.G_YEAR_MONTH -> new GYearMonthValue(getExpression(), value); case Type.G_MONTH_DAY -> new GMonthDayValue(getExpression(), value); case Type.UNTYPED_ATOMIC -> new UntypedAtomicValue(getExpression(), getStringValue()); - default -> throw new XPathException(getExpression(), ErrorCodes.FORG0001, "cannot cast '" + + default -> throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "cannot cast '" + Type.getTypeName(this.getItemType()) + "(\"" + getStringValue() + "\")' to " + Type.getTypeName(requiredType)); }; diff --git a/exist-core/src/main/java/org/exist/xquery/value/TimeValue.java b/exist-core/src/main/java/org/exist/xquery/value/TimeValue.java index ae15414c308..99ca5721282 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/TimeValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/TimeValue.java @@ -108,7 +108,7 @@ public AtomicValue convertTo(final int requiredType) throws XPathException { case Type.UNTYPED_ATOMIC: return new UntypedAtomicValue(getExpression(), getStringValue()); default: - throw new XPathException(getExpression(), ErrorCodes.FORG0001, + throw new XPathException(getExpression(), ErrorCodes.XPTY0004, "Type error: cannot cast xs:time to " + Type.getTypeName(requiredType)); } diff --git a/exist-core/src/main/java/org/exist/xquery/value/Type.java b/exist-core/src/main/java/org/exist/xquery/value/Type.java index f60c60d7255..0c65c7a031a 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/Type.java +++ b/exist-core/src/main/java/org/exist/xquery/value/Type.java @@ -133,9 +133,12 @@ public class Type { public final static int JAVA_OBJECT = 68; public final static int EMPTY_SEQUENCE = 69; // NOTE(AR) this types does appear in the XQ 3.1 spec - https://www.w3.org/TR/xquery-31/#id-sequencetype-syntax - private final static int[] superTypes = new int[69]; - private final static Int2ObjectOpenHashMap typeNames = new Int2ObjectOpenHashMap<>(69, Hash.FAST_LOAD_FACTOR); - private final static Object2IntOpenHashMap typeCodes = new Object2IntOpenHashMap<>(78, Hash.FAST_LOAD_FACTOR); + /* XQuery 4.0 types */ + public final static int RECORD = 70; + + private final static int[] superTypes = new int[71]; + private final static Int2ObjectOpenHashMap typeNames = new Int2ObjectOpenHashMap<>(71, Hash.FAST_LOAD_FACTOR); + private final static Object2IntOpenHashMap typeCodes = new Object2IntOpenHashMap<>(80, Hash.FAST_LOAD_FACTOR); static { typeCodes.defaultReturnValue(NO_SUCH_VALUE); } @@ -249,6 +252,9 @@ public class Type { defineSubType(FUNCTION, MAP_ITEM); defineSubType(FUNCTION, ARRAY_ITEM); + // XQ4: RECORD is a subtype of MAP + defineSubType(MAP_ITEM, RECORD); + // NODE types defineSubType(NODE, ATTRIBUTE); defineSubType(NODE, CDATA_SECTION); @@ -327,6 +333,7 @@ public class Type { defineBuiltInType(FUNCTION, "function(*)", "function"); defineBuiltInType(ARRAY_ITEM, "array(*)", "array"); defineBuiltInType(MAP_ITEM, "map(*)", "map"); // keep `map` for backward compatibility + defineBuiltInType(RECORD, "record(*)", "record"); defineBuiltInType(CDATA_SECTION, "cdata-section()"); defineBuiltInType(JAVA_OBJECT, "object"); defineBuiltInType(EMPTY_SEQUENCE, "empty-sequence()", "empty()"); // keep `empty()` for backward compatibility diff --git a/exist-core/src/main/java/org/exist/xquery/value/UntypedAtomicValue.java b/exist-core/src/main/java/org/exist/xquery/value/UntypedAtomicValue.java index 60d1ab47bbd..323ca2dce8d 100644 --- a/exist-core/src/main/java/org/exist/xquery/value/UntypedAtomicValue.java +++ b/exist-core/src/main/java/org/exist/xquery/value/UntypedAtomicValue.java @@ -154,7 +154,7 @@ TODO replace UntypedAtomicValue with something that can allow lazily reading tex final DayTimeDurationValue dtdv = new DayTimeDurationValue(expression, value); return new DayTimeDurationValue(expression, dtdv.getCanonicalDuration()); default: - throw new XPathException(expression, ErrorCodes.FORG0001, "cannot cast '" + + throw new XPathException(expression, ErrorCodes.XPTY0004, "cannot cast '" + Type.getTypeName(Type.ANY_ATOMIC_TYPE) + "(\"" + value + "\")' to " + Type.getTypeName(requiredType)); } diff --git a/exist-core/src/main/resources/org/exist/xquery/functions/fn/html5-entities.properties b/exist-core/src/main/resources/org/exist/xquery/functions/fn/html5-entities.properties new file mode 100644 index 00000000000..6c8b0f49aed --- /dev/null +++ b/exist-core/src/main/resources/org/exist/xquery/functions/fn/html5-entities.properties @@ -0,0 +1,2255 @@ +# +# eXist-db Open Source Native XML Database +# Copyright (C) 2001 The eXist-db Authors +# +# info@exist-db.org +# http://www.exist-db.org +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# + +# HTML5 Named Character References +# Generated from https://html.spec.whatwg.org/entities.json +AElig=U+00C6 +AElig=U+00C6 +AMP=U+0026 +AMP=U+0026 +Aacute=U+00C1 +Aacute=U+00C1 +Abreve=U+0102 +Acirc=U+00C2 +Acirc=U+00C2 +Acy=U+0410 +Afr=U+1D504 +Agrave=U+00C0 +Agrave=U+00C0 +Alpha=U+0391 +Amacr=U+0100 +And=U+2A53 +Aogon=U+0104 +Aopf=U+1D538 +ApplyFunction=U+2061 +Aring=U+00C5 +Aring=U+00C5 +Ascr=U+1D49C +Assign=U+2254 +Atilde=U+00C3 +Atilde=U+00C3 +Auml=U+00C4 +Auml=U+00C4 +Backslash=U+2216 +Barv=U+2AE7 +Barwed=U+2306 +Bcy=U+0411 +Because=U+2235 +Bernoullis=U+212C +Beta=U+0392 +Bfr=U+1D505 +Bopf=U+1D539 +Breve=U+02D8 +Bscr=U+212C +Bumpeq=U+224E +CHcy=U+0427 +COPY=U+00A9 +COPY=U+00A9 +Cacute=U+0106 +Cap=U+22D2 +CapitalDifferentialD=U+2145 +Cayleys=U+212D +Ccaron=U+010C +Ccedil=U+00C7 +Ccedil=U+00C7 +Ccirc=U+0108 +Cconint=U+2230 +Cdot=U+010A +Cedilla=U+00B8 +CenterDot=U+00B7 +Cfr=U+212D +Chi=U+03A7 +CircleDot=U+2299 +CircleMinus=U+2296 +CirclePlus=U+2295 +CircleTimes=U+2297 +ClockwiseContourIntegral=U+2232 +CloseCurlyDoubleQuote=U+201D +CloseCurlyQuote=U+2019 +Colon=U+2237 +Colone=U+2A74 +Congruent=U+2261 +Conint=U+222F +ContourIntegral=U+222E +Copf=U+2102 +Coproduct=U+2210 +CounterClockwiseContourIntegral=U+2233 +Cross=U+2A2F +Cscr=U+1D49E +Cup=U+22D3 +CupCap=U+224D +DD=U+2145 +DDotrahd=U+2911 +DJcy=U+0402 +DScy=U+0405 +DZcy=U+040F +Dagger=U+2021 +Darr=U+21A1 +Dashv=U+2AE4 +Dcaron=U+010E +Dcy=U+0414 +Del=U+2207 +Delta=U+0394 +Dfr=U+1D507 +DiacriticalAcute=U+00B4 +DiacriticalDot=U+02D9 +DiacriticalDoubleAcute=U+02DD +DiacriticalGrave=U+0060 +DiacriticalTilde=U+02DC +Diamond=U+22C4 +DifferentialD=U+2146 +Dopf=U+1D53B +Dot=U+00A8 +DotDot=U+20DC +DotEqual=U+2250 +DoubleContourIntegral=U+222F +DoubleDot=U+00A8 +DoubleDownArrow=U+21D3 +DoubleLeftArrow=U+21D0 +DoubleLeftRightArrow=U+21D4 +DoubleLeftTee=U+2AE4 +DoubleLongLeftArrow=U+27F8 +DoubleLongLeftRightArrow=U+27FA +DoubleLongRightArrow=U+27F9 +DoubleRightArrow=U+21D2 +DoubleRightTee=U+22A8 +DoubleUpArrow=U+21D1 +DoubleUpDownArrow=U+21D5 +DoubleVerticalBar=U+2225 +DownArrow=U+2193 +DownArrowBar=U+2913 +DownArrowUpArrow=U+21F5 +DownBreve=U+0311 +DownLeftRightVector=U+2950 +DownLeftTeeVector=U+295E +DownLeftVector=U+21BD +DownLeftVectorBar=U+2956 +DownRightTeeVector=U+295F +DownRightVector=U+21C1 +DownRightVectorBar=U+2957 +DownTee=U+22A4 +DownTeeArrow=U+21A7 +Downarrow=U+21D3 +Dscr=U+1D49F +Dstrok=U+0110 +ENG=U+014A +ETH=U+00D0 +ETH=U+00D0 +Eacute=U+00C9 +Eacute=U+00C9 +Ecaron=U+011A +Ecirc=U+00CA +Ecirc=U+00CA +Ecy=U+042D +Edot=U+0116 +Efr=U+1D508 +Egrave=U+00C8 +Egrave=U+00C8 +Element=U+2208 +Emacr=U+0112 +EmptySmallSquare=U+25FB +EmptyVerySmallSquare=U+25AB +Eogon=U+0118 +Eopf=U+1D53C +Epsilon=U+0395 +Equal=U+2A75 +EqualTilde=U+2242 +Equilibrium=U+21CC +Escr=U+2130 +Esim=U+2A73 +Eta=U+0397 +Euml=U+00CB +Euml=U+00CB +Exists=U+2203 +ExponentialE=U+2147 +Fcy=U+0424 +Ffr=U+1D509 +FilledSmallSquare=U+25FC +FilledVerySmallSquare=U+25AA +Fopf=U+1D53D +ForAll=U+2200 +Fouriertrf=U+2131 +Fscr=U+2131 +GJcy=U+0403 +GT=U+003E +GT=U+003E +Gamma=U+0393 +Gammad=U+03DC +Gbreve=U+011E +Gcedil=U+0122 +Gcirc=U+011C +Gcy=U+0413 +Gdot=U+0120 +Gfr=U+1D50A +Gg=U+22D9 +Gopf=U+1D53E +GreaterEqual=U+2265 +GreaterEqualLess=U+22DB +GreaterFullEqual=U+2267 +GreaterGreater=U+2AA2 +GreaterLess=U+2277 +GreaterSlantEqual=U+2A7E +GreaterTilde=U+2273 +Gscr=U+1D4A2 +Gt=U+226B +HARDcy=U+042A +Hacek=U+02C7 +Hat=U+005E +Hcirc=U+0124 +Hfr=U+210C +HilbertSpace=U+210B +Hopf=U+210D +HorizontalLine=U+2500 +Hscr=U+210B +Hstrok=U+0126 +HumpDownHump=U+224E +HumpEqual=U+224F +IEcy=U+0415 +IJlig=U+0132 +IOcy=U+0401 +Iacute=U+00CD +Iacute=U+00CD +Icirc=U+00CE +Icirc=U+00CE +Icy=U+0418 +Idot=U+0130 +Ifr=U+2111 +Igrave=U+00CC +Igrave=U+00CC +Im=U+2111 +Imacr=U+012A +ImaginaryI=U+2148 +Implies=U+21D2 +Int=U+222C +Integral=U+222B +Intersection=U+22C2 +InvisibleComma=U+2063 +InvisibleTimes=U+2062 +Iogon=U+012E +Iopf=U+1D540 +Iota=U+0399 +Iscr=U+2110 +Itilde=U+0128 +Iukcy=U+0406 +Iuml=U+00CF +Iuml=U+00CF +Jcirc=U+0134 +Jcy=U+0419 +Jfr=U+1D50D +Jopf=U+1D541 +Jscr=U+1D4A5 +Jsercy=U+0408 +Jukcy=U+0404 +KHcy=U+0425 +KJcy=U+040C +Kappa=U+039A +Kcedil=U+0136 +Kcy=U+041A +Kfr=U+1D50E +Kopf=U+1D542 +Kscr=U+1D4A6 +LJcy=U+0409 +LT=U+003C +LT=U+003C +Lacute=U+0139 +Lambda=U+039B +Lang=U+27EA +Laplacetrf=U+2112 +Larr=U+219E +Lcaron=U+013D +Lcedil=U+013B +Lcy=U+041B +LeftAngleBracket=U+27E8 +LeftArrow=U+2190 +LeftArrowBar=U+21E4 +LeftArrowRightArrow=U+21C6 +LeftCeiling=U+2308 +LeftDoubleBracket=U+27E6 +LeftDownTeeVector=U+2961 +LeftDownVector=U+21C3 +LeftDownVectorBar=U+2959 +LeftFloor=U+230A +LeftRightArrow=U+2194 +LeftRightVector=U+294E +LeftTee=U+22A3 +LeftTeeArrow=U+21A4 +LeftTeeVector=U+295A +LeftTriangle=U+22B2 +LeftTriangleBar=U+29CF +LeftTriangleEqual=U+22B4 +LeftUpDownVector=U+2951 +LeftUpTeeVector=U+2960 +LeftUpVector=U+21BF +LeftUpVectorBar=U+2958 +LeftVector=U+21BC +LeftVectorBar=U+2952 +Leftarrow=U+21D0 +Leftrightarrow=U+21D4 +LessEqualGreater=U+22DA +LessFullEqual=U+2266 +LessGreater=U+2276 +LessLess=U+2AA1 +LessSlantEqual=U+2A7D +LessTilde=U+2272 +Lfr=U+1D50F +Ll=U+22D8 +Lleftarrow=U+21DA +Lmidot=U+013F +LongLeftArrow=U+27F5 +LongLeftRightArrow=U+27F7 +LongRightArrow=U+27F6 +Longleftarrow=U+27F8 +Longleftrightarrow=U+27FA +Longrightarrow=U+27F9 +Lopf=U+1D543 +LowerLeftArrow=U+2199 +LowerRightArrow=U+2198 +Lscr=U+2112 +Lsh=U+21B0 +Lstrok=U+0141 +Lt=U+226A +Map=U+2905 +Mcy=U+041C +MediumSpace=U+205F +Mellintrf=U+2133 +Mfr=U+1D510 +MinusPlus=U+2213 +Mopf=U+1D544 +Mscr=U+2133 +Mu=U+039C +NJcy=U+040A +Nacute=U+0143 +Ncaron=U+0147 +Ncedil=U+0145 +Ncy=U+041D +NegativeMediumSpace=U+200B +NegativeThickSpace=U+200B +NegativeThinSpace=U+200B +NegativeVeryThinSpace=U+200B +NestedGreaterGreater=U+226B +NestedLessLess=U+226A +NewLine=U+000A +Nfr=U+1D511 +NoBreak=U+2060 +NonBreakingSpace=U+00A0 +Nopf=U+2115 +Not=U+2AEC +NotCongruent=U+2262 +NotCupCap=U+226D +NotDoubleVerticalBar=U+2226 +NotElement=U+2209 +NotEqual=U+2260 +NotEqualTilde=U+2242,U+0338 +NotExists=U+2204 +NotGreater=U+226F +NotGreaterEqual=U+2271 +NotGreaterFullEqual=U+2267,U+0338 +NotGreaterGreater=U+226B,U+0338 +NotGreaterLess=U+2279 +NotGreaterSlantEqual=U+2A7E,U+0338 +NotGreaterTilde=U+2275 +NotHumpDownHump=U+224E,U+0338 +NotHumpEqual=U+224F,U+0338 +NotLeftTriangle=U+22EA +NotLeftTriangleBar=U+29CF,U+0338 +NotLeftTriangleEqual=U+22EC +NotLess=U+226E +NotLessEqual=U+2270 +NotLessGreater=U+2278 +NotLessLess=U+226A,U+0338 +NotLessSlantEqual=U+2A7D,U+0338 +NotLessTilde=U+2274 +NotNestedGreaterGreater=U+2AA2,U+0338 +NotNestedLessLess=U+2AA1,U+0338 +NotPrecedes=U+2280 +NotPrecedesEqual=U+2AAF,U+0338 +NotPrecedesSlantEqual=U+22E0 +NotReverseElement=U+220C +NotRightTriangle=U+22EB +NotRightTriangleBar=U+29D0,U+0338 +NotRightTriangleEqual=U+22ED +NotSquareSubset=U+228F,U+0338 +NotSquareSubsetEqual=U+22E2 +NotSquareSuperset=U+2290,U+0338 +NotSquareSupersetEqual=U+22E3 +NotSubset=U+2282,U+20D2 +NotSubsetEqual=U+2288 +NotSucceeds=U+2281 +NotSucceedsEqual=U+2AB0,U+0338 +NotSucceedsSlantEqual=U+22E1 +NotSucceedsTilde=U+227F,U+0338 +NotSuperset=U+2283,U+20D2 +NotSupersetEqual=U+2289 +NotTilde=U+2241 +NotTildeEqual=U+2244 +NotTildeFullEqual=U+2247 +NotTildeTilde=U+2249 +NotVerticalBar=U+2224 +Nscr=U+1D4A9 +Ntilde=U+00D1 +Ntilde=U+00D1 +Nu=U+039D +OElig=U+0152 +Oacute=U+00D3 +Oacute=U+00D3 +Ocirc=U+00D4 +Ocirc=U+00D4 +Ocy=U+041E +Odblac=U+0150 +Ofr=U+1D512 +Ograve=U+00D2 +Ograve=U+00D2 +Omacr=U+014C +Omega=U+03A9 +Omicron=U+039F +Oopf=U+1D546 +OpenCurlyDoubleQuote=U+201C +OpenCurlyQuote=U+2018 +Or=U+2A54 +Oscr=U+1D4AA +Oslash=U+00D8 +Oslash=U+00D8 +Otilde=U+00D5 +Otilde=U+00D5 +Otimes=U+2A37 +Ouml=U+00D6 +Ouml=U+00D6 +OverBar=U+203E +OverBrace=U+23DE +OverBracket=U+23B4 +OverParenthesis=U+23DC +PartialD=U+2202 +Pcy=U+041F +Pfr=U+1D513 +Phi=U+03A6 +Pi=U+03A0 +PlusMinus=U+00B1 +Poincareplane=U+210C +Popf=U+2119 +Pr=U+2ABB +Precedes=U+227A +PrecedesEqual=U+2AAF +PrecedesSlantEqual=U+227C +PrecedesTilde=U+227E +Prime=U+2033 +Product=U+220F +Proportion=U+2237 +Proportional=U+221D +Pscr=U+1D4AB +Psi=U+03A8 +QUOT=U+0022 +QUOT=U+0022 +Qfr=U+1D514 +Qopf=U+211A +Qscr=U+1D4AC +RBarr=U+2910 +REG=U+00AE +REG=U+00AE +Racute=U+0154 +Rang=U+27EB +Rarr=U+21A0 +Rarrtl=U+2916 +Rcaron=U+0158 +Rcedil=U+0156 +Rcy=U+0420 +Re=U+211C +ReverseElement=U+220B +ReverseEquilibrium=U+21CB +ReverseUpEquilibrium=U+296F +Rfr=U+211C +Rho=U+03A1 +RightAngleBracket=U+27E9 +RightArrow=U+2192 +RightArrowBar=U+21E5 +RightArrowLeftArrow=U+21C4 +RightCeiling=U+2309 +RightDoubleBracket=U+27E7 +RightDownTeeVector=U+295D +RightDownVector=U+21C2 +RightDownVectorBar=U+2955 +RightFloor=U+230B +RightTee=U+22A2 +RightTeeArrow=U+21A6 +RightTeeVector=U+295B +RightTriangle=U+22B3 +RightTriangleBar=U+29D0 +RightTriangleEqual=U+22B5 +RightUpDownVector=U+294F +RightUpTeeVector=U+295C +RightUpVector=U+21BE +RightUpVectorBar=U+2954 +RightVector=U+21C0 +RightVectorBar=U+2953 +Rightarrow=U+21D2 +Ropf=U+211D +RoundImplies=U+2970 +Rrightarrow=U+21DB +Rscr=U+211B +Rsh=U+21B1 +RuleDelayed=U+29F4 +SHCHcy=U+0429 +SHcy=U+0428 +SOFTcy=U+042C +Sacute=U+015A +Sc=U+2ABC +Scaron=U+0160 +Scedil=U+015E +Scirc=U+015C +Scy=U+0421 +Sfr=U+1D516 +ShortDownArrow=U+2193 +ShortLeftArrow=U+2190 +ShortRightArrow=U+2192 +ShortUpArrow=U+2191 +Sigma=U+03A3 +SmallCircle=U+2218 +Sopf=U+1D54A +Sqrt=U+221A +Square=U+25A1 +SquareIntersection=U+2293 +SquareSubset=U+228F +SquareSubsetEqual=U+2291 +SquareSuperset=U+2290 +SquareSupersetEqual=U+2292 +SquareUnion=U+2294 +Sscr=U+1D4AE +Star=U+22C6 +Sub=U+22D0 +Subset=U+22D0 +SubsetEqual=U+2286 +Succeeds=U+227B +SucceedsEqual=U+2AB0 +SucceedsSlantEqual=U+227D +SucceedsTilde=U+227F +SuchThat=U+220B +Sum=U+2211 +Sup=U+22D1 +Superset=U+2283 +SupersetEqual=U+2287 +Supset=U+22D1 +THORN=U+00DE +THORN=U+00DE +TRADE=U+2122 +TSHcy=U+040B +TScy=U+0426 +Tab=U+0009 +Tau=U+03A4 +Tcaron=U+0164 +Tcedil=U+0162 +Tcy=U+0422 +Tfr=U+1D517 +Therefore=U+2234 +Theta=U+0398 +ThickSpace=U+205F,U+200A +ThinSpace=U+2009 +Tilde=U+223C +TildeEqual=U+2243 +TildeFullEqual=U+2245 +TildeTilde=U+2248 +Topf=U+1D54B +TripleDot=U+20DB +Tscr=U+1D4AF +Tstrok=U+0166 +Uacute=U+00DA +Uacute=U+00DA +Uarr=U+219F +Uarrocir=U+2949 +Ubrcy=U+040E +Ubreve=U+016C +Ucirc=U+00DB +Ucirc=U+00DB +Ucy=U+0423 +Udblac=U+0170 +Ufr=U+1D518 +Ugrave=U+00D9 +Ugrave=U+00D9 +Umacr=U+016A +UnderBar=U+005F +UnderBrace=U+23DF +UnderBracket=U+23B5 +UnderParenthesis=U+23DD +Union=U+22C3 +UnionPlus=U+228E +Uogon=U+0172 +Uopf=U+1D54C +UpArrow=U+2191 +UpArrowBar=U+2912 +UpArrowDownArrow=U+21C5 +UpDownArrow=U+2195 +UpEquilibrium=U+296E +UpTee=U+22A5 +UpTeeArrow=U+21A5 +Uparrow=U+21D1 +Updownarrow=U+21D5 +UpperLeftArrow=U+2196 +UpperRightArrow=U+2197 +Upsi=U+03D2 +Upsilon=U+03A5 +Uring=U+016E +Uscr=U+1D4B0 +Utilde=U+0168 +Uuml=U+00DC +Uuml=U+00DC +VDash=U+22AB +Vbar=U+2AEB +Vcy=U+0412 +Vdash=U+22A9 +Vdashl=U+2AE6 +Vee=U+22C1 +Verbar=U+2016 +Vert=U+2016 +VerticalBar=U+2223 +VerticalLine=U+007C +VerticalSeparator=U+2758 +VerticalTilde=U+2240 +VeryThinSpace=U+200A +Vfr=U+1D519 +Vopf=U+1D54D +Vscr=U+1D4B1 +Vvdash=U+22AA +Wcirc=U+0174 +Wedge=U+22C0 +Wfr=U+1D51A +Wopf=U+1D54E +Wscr=U+1D4B2 +Xfr=U+1D51B +Xi=U+039E +Xopf=U+1D54F +Xscr=U+1D4B3 +YAcy=U+042F +YIcy=U+0407 +YUcy=U+042E +Yacute=U+00DD +Yacute=U+00DD +Ycirc=U+0176 +Ycy=U+042B +Yfr=U+1D51C +Yopf=U+1D550 +Yscr=U+1D4B4 +Yuml=U+0178 +ZHcy=U+0416 +Zacute=U+0179 +Zcaron=U+017D +Zcy=U+0417 +Zdot=U+017B +ZeroWidthSpace=U+200B +Zeta=U+0396 +Zfr=U+2128 +Zopf=U+2124 +Zscr=U+1D4B5 +aacute=U+00E1 +aacute=U+00E1 +abreve=U+0103 +ac=U+223E +acE=U+223E,U+0333 +acd=U+223F +acirc=U+00E2 +acirc=U+00E2 +acute=U+00B4 +acute=U+00B4 +acy=U+0430 +aelig=U+00E6 +aelig=U+00E6 +af=U+2061 +afr=U+1D51E +agrave=U+00E0 +agrave=U+00E0 +alefsym=U+2135 +aleph=U+2135 +alpha=U+03B1 +amacr=U+0101 +amalg=U+2A3F +amp=U+0026 +amp=U+0026 +and=U+2227 +andand=U+2A55 +andd=U+2A5C +andslope=U+2A58 +andv=U+2A5A +ang=U+2220 +ange=U+29A4 +angle=U+2220 +angmsd=U+2221 +angmsdaa=U+29A8 +angmsdab=U+29A9 +angmsdac=U+29AA +angmsdad=U+29AB +angmsdae=U+29AC +angmsdaf=U+29AD +angmsdag=U+29AE +angmsdah=U+29AF +angrt=U+221F +angrtvb=U+22BE +angrtvbd=U+299D +angsph=U+2222 +angst=U+00C5 +angzarr=U+237C +aogon=U+0105 +aopf=U+1D552 +ap=U+2248 +apE=U+2A70 +apacir=U+2A6F +ape=U+224A +apid=U+224B +apos=U+0027 +approx=U+2248 +approxeq=U+224A +aring=U+00E5 +aring=U+00E5 +ascr=U+1D4B6 +ast=U+002A +asymp=U+2248 +asympeq=U+224D +atilde=U+00E3 +atilde=U+00E3 +auml=U+00E4 +auml=U+00E4 +awconint=U+2233 +awint=U+2A11 +bNot=U+2AED +backcong=U+224C +backepsilon=U+03F6 +backprime=U+2035 +backsim=U+223D +backsimeq=U+22CD +barvee=U+22BD +barwed=U+2305 +barwedge=U+2305 +bbrk=U+23B5 +bbrktbrk=U+23B6 +bcong=U+224C +bcy=U+0431 +bdquo=U+201E +becaus=U+2235 +because=U+2235 +bemptyv=U+29B0 +bepsi=U+03F6 +bernou=U+212C +beta=U+03B2 +beth=U+2136 +between=U+226C +bfr=U+1D51F +bigcap=U+22C2 +bigcirc=U+25EF +bigcup=U+22C3 +bigodot=U+2A00 +bigoplus=U+2A01 +bigotimes=U+2A02 +bigsqcup=U+2A06 +bigstar=U+2605 +bigtriangledown=U+25BD +bigtriangleup=U+25B3 +biguplus=U+2A04 +bigvee=U+22C1 +bigwedge=U+22C0 +bkarow=U+290D +blacklozenge=U+29EB +blacksquare=U+25AA +blacktriangle=U+25B4 +blacktriangledown=U+25BE +blacktriangleleft=U+25C2 +blacktriangleright=U+25B8 +blank=U+2423 +blk12=U+2592 +blk14=U+2591 +blk34=U+2593 +block=U+2588 +bne=U+003D,U+20E5 +bnequiv=U+2261,U+20E5 +bnot=U+2310 +bopf=U+1D553 +bot=U+22A5 +bottom=U+22A5 +bowtie=U+22C8 +boxDL=U+2557 +boxDR=U+2554 +boxDl=U+2556 +boxDr=U+2553 +boxH=U+2550 +boxHD=U+2566 +boxHU=U+2569 +boxHd=U+2564 +boxHu=U+2567 +boxUL=U+255D +boxUR=U+255A +boxUl=U+255C +boxUr=U+2559 +boxV=U+2551 +boxVH=U+256C +boxVL=U+2563 +boxVR=U+2560 +boxVh=U+256B +boxVl=U+2562 +boxVr=U+255F +boxbox=U+29C9 +boxdL=U+2555 +boxdR=U+2552 +boxdl=U+2510 +boxdr=U+250C +boxh=U+2500 +boxhD=U+2565 +boxhU=U+2568 +boxhd=U+252C +boxhu=U+2534 +boxminus=U+229F +boxplus=U+229E +boxtimes=U+22A0 +boxuL=U+255B +boxuR=U+2558 +boxul=U+2518 +boxur=U+2514 +boxv=U+2502 +boxvH=U+256A +boxvL=U+2561 +boxvR=U+255E +boxvh=U+253C +boxvl=U+2524 +boxvr=U+251C +bprime=U+2035 +breve=U+02D8 +brvbar=U+00A6 +brvbar=U+00A6 +bscr=U+1D4B7 +bsemi=U+204F +bsim=U+223D +bsime=U+22CD +bsol=U+005C +bsolb=U+29C5 +bsolhsub=U+27C8 +bull=U+2022 +bullet=U+2022 +bump=U+224E +bumpE=U+2AAE +bumpe=U+224F +bumpeq=U+224F +cacute=U+0107 +cap=U+2229 +capand=U+2A44 +capbrcup=U+2A49 +capcap=U+2A4B +capcup=U+2A47 +capdot=U+2A40 +caps=U+2229,U+FE00 +caret=U+2041 +caron=U+02C7 +ccaps=U+2A4D +ccaron=U+010D +ccedil=U+00E7 +ccedil=U+00E7 +ccirc=U+0109 +ccups=U+2A4C +ccupssm=U+2A50 +cdot=U+010B +cedil=U+00B8 +cedil=U+00B8 +cemptyv=U+29B2 +cent=U+00A2 +cent=U+00A2 +centerdot=U+00B7 +cfr=U+1D520 +chcy=U+0447 +check=U+2713 +checkmark=U+2713 +chi=U+03C7 +cir=U+25CB +cirE=U+29C3 +circ=U+02C6 +circeq=U+2257 +circlearrowleft=U+21BA +circlearrowright=U+21BB +circledR=U+00AE +circledS=U+24C8 +circledast=U+229B +circledcirc=U+229A +circleddash=U+229D +cire=U+2257 +cirfnint=U+2A10 +cirmid=U+2AEF +cirscir=U+29C2 +clubs=U+2663 +clubsuit=U+2663 +colon=U+003A +colone=U+2254 +coloneq=U+2254 +comma=U+002C +commat=U+0040 +comp=U+2201 +compfn=U+2218 +complement=U+2201 +complexes=U+2102 +cong=U+2245 +congdot=U+2A6D +conint=U+222E +copf=U+1D554 +coprod=U+2210 +copy=U+00A9 +copy=U+00A9 +copysr=U+2117 +crarr=U+21B5 +cross=U+2717 +cscr=U+1D4B8 +csub=U+2ACF +csube=U+2AD1 +csup=U+2AD0 +csupe=U+2AD2 +ctdot=U+22EF +cudarrl=U+2938 +cudarrr=U+2935 +cuepr=U+22DE +cuesc=U+22DF +cularr=U+21B6 +cularrp=U+293D +cup=U+222A +cupbrcap=U+2A48 +cupcap=U+2A46 +cupcup=U+2A4A +cupdot=U+228D +cupor=U+2A45 +cups=U+222A,U+FE00 +curarr=U+21B7 +curarrm=U+293C +curlyeqprec=U+22DE +curlyeqsucc=U+22DF +curlyvee=U+22CE +curlywedge=U+22CF +curren=U+00A4 +curren=U+00A4 +curvearrowleft=U+21B6 +curvearrowright=U+21B7 +cuvee=U+22CE +cuwed=U+22CF +cwconint=U+2232 +cwint=U+2231 +cylcty=U+232D +dArr=U+21D3 +dHar=U+2965 +dagger=U+2020 +daleth=U+2138 +darr=U+2193 +dash=U+2010 +dashv=U+22A3 +dbkarow=U+290F +dblac=U+02DD +dcaron=U+010F +dcy=U+0434 +dd=U+2146 +ddagger=U+2021 +ddarr=U+21CA +ddotseq=U+2A77 +deg=U+00B0 +deg=U+00B0 +delta=U+03B4 +demptyv=U+29B1 +dfisht=U+297F +dfr=U+1D521 +dharl=U+21C3 +dharr=U+21C2 +diam=U+22C4 +diamond=U+22C4 +diamondsuit=U+2666 +diams=U+2666 +die=U+00A8 +digamma=U+03DD +disin=U+22F2 +div=U+00F7 +divide=U+00F7 +divide=U+00F7 +divideontimes=U+22C7 +divonx=U+22C7 +djcy=U+0452 +dlcorn=U+231E +dlcrop=U+230D +dollar=U+0024 +dopf=U+1D555 +dot=U+02D9 +doteq=U+2250 +doteqdot=U+2251 +dotminus=U+2238 +dotplus=U+2214 +dotsquare=U+22A1 +doublebarwedge=U+2306 +downarrow=U+2193 +downdownarrows=U+21CA +downharpoonleft=U+21C3 +downharpoonright=U+21C2 +drbkarow=U+2910 +drcorn=U+231F +drcrop=U+230C +dscr=U+1D4B9 +dscy=U+0455 +dsol=U+29F6 +dstrok=U+0111 +dtdot=U+22F1 +dtri=U+25BF +dtrif=U+25BE +duarr=U+21F5 +duhar=U+296F +dwangle=U+29A6 +dzcy=U+045F +dzigrarr=U+27FF +eDDot=U+2A77 +eDot=U+2251 +eacute=U+00E9 +eacute=U+00E9 +easter=U+2A6E +ecaron=U+011B +ecir=U+2256 +ecirc=U+00EA +ecirc=U+00EA +ecolon=U+2255 +ecy=U+044D +edot=U+0117 +ee=U+2147 +efDot=U+2252 +efr=U+1D522 +eg=U+2A9A +egrave=U+00E8 +egrave=U+00E8 +egs=U+2A96 +egsdot=U+2A98 +el=U+2A99 +elinters=U+23E7 +ell=U+2113 +els=U+2A95 +elsdot=U+2A97 +emacr=U+0113 +empty=U+2205 +emptyset=U+2205 +emptyv=U+2205 +emsp13=U+2004 +emsp14=U+2005 +emsp=U+2003 +eng=U+014B +ensp=U+2002 +eogon=U+0119 +eopf=U+1D556 +epar=U+22D5 +eparsl=U+29E3 +eplus=U+2A71 +epsi=U+03B5 +epsilon=U+03B5 +epsiv=U+03F5 +eqcirc=U+2256 +eqcolon=U+2255 +eqsim=U+2242 +eqslantgtr=U+2A96 +eqslantless=U+2A95 +equals=U+003D +equest=U+225F +equiv=U+2261 +equivDD=U+2A78 +eqvparsl=U+29E5 +erDot=U+2253 +erarr=U+2971 +escr=U+212F +esdot=U+2250 +esim=U+2242 +eta=U+03B7 +eth=U+00F0 +eth=U+00F0 +euml=U+00EB +euml=U+00EB +euro=U+20AC +excl=U+0021 +exist=U+2203 +expectation=U+2130 +exponentiale=U+2147 +fallingdotseq=U+2252 +fcy=U+0444 +female=U+2640 +ffilig=U+FB03 +fflig=U+FB00 +ffllig=U+FB04 +ffr=U+1D523 +filig=U+FB01 +fjlig=U+0066,U+006A +flat=U+266D +fllig=U+FB02 +fltns=U+25B1 +fnof=U+0192 +fopf=U+1D557 +forall=U+2200 +fork=U+22D4 +forkv=U+2AD9 +fpartint=U+2A0D +frac12=U+00BD +frac12=U+00BD +frac13=U+2153 +frac14=U+00BC +frac14=U+00BC +frac15=U+2155 +frac16=U+2159 +frac18=U+215B +frac23=U+2154 +frac25=U+2156 +frac34=U+00BE +frac34=U+00BE +frac35=U+2157 +frac38=U+215C +frac45=U+2158 +frac56=U+215A +frac58=U+215D +frac78=U+215E +frasl=U+2044 +frown=U+2322 +fscr=U+1D4BB +gE=U+2267 +gEl=U+2A8C +gacute=U+01F5 +gamma=U+03B3 +gammad=U+03DD +gap=U+2A86 +gbreve=U+011F +gcirc=U+011D +gcy=U+0433 +gdot=U+0121 +ge=U+2265 +gel=U+22DB +geq=U+2265 +geqq=U+2267 +geqslant=U+2A7E +ges=U+2A7E +gescc=U+2AA9 +gesdot=U+2A80 +gesdoto=U+2A82 +gesdotol=U+2A84 +gesl=U+22DB,U+FE00 +gesles=U+2A94 +gfr=U+1D524 +gg=U+226B +ggg=U+22D9 +gimel=U+2137 +gjcy=U+0453 +gl=U+2277 +glE=U+2A92 +gla=U+2AA5 +glj=U+2AA4 +gnE=U+2269 +gnap=U+2A8A +gnapprox=U+2A8A +gne=U+2A88 +gneq=U+2A88 +gneqq=U+2269 +gnsim=U+22E7 +gopf=U+1D558 +grave=U+0060 +gscr=U+210A +gsim=U+2273 +gsime=U+2A8E +gsiml=U+2A90 +gt=U+003E +gt=U+003E +gtcc=U+2AA7 +gtcir=U+2A7A +gtdot=U+22D7 +gtlPar=U+2995 +gtquest=U+2A7C +gtrapprox=U+2A86 +gtrarr=U+2978 +gtrdot=U+22D7 +gtreqless=U+22DB +gtreqqless=U+2A8C +gtrless=U+2277 +gtrsim=U+2273 +gvertneqq=U+2269,U+FE00 +gvnE=U+2269,U+FE00 +hArr=U+21D4 +hairsp=U+200A +half=U+00BD +hamilt=U+210B +hardcy=U+044A +harr=U+2194 +harrcir=U+2948 +harrw=U+21AD +hbar=U+210F +hcirc=U+0125 +hearts=U+2665 +heartsuit=U+2665 +hellip=U+2026 +hercon=U+22B9 +hfr=U+1D525 +hksearow=U+2925 +hkswarow=U+2926 +hoarr=U+21FF +homtht=U+223B +hookleftarrow=U+21A9 +hookrightarrow=U+21AA +hopf=U+1D559 +horbar=U+2015 +hscr=U+1D4BD +hslash=U+210F +hstrok=U+0127 +hybull=U+2043 +hyphen=U+2010 +iacute=U+00ED +iacute=U+00ED +ic=U+2063 +icirc=U+00EE +icirc=U+00EE +icy=U+0438 +iecy=U+0435 +iexcl=U+00A1 +iexcl=U+00A1 +iff=U+21D4 +ifr=U+1D526 +igrave=U+00EC +igrave=U+00EC +ii=U+2148 +iiiint=U+2A0C +iiint=U+222D +iinfin=U+29DC +iiota=U+2129 +ijlig=U+0133 +imacr=U+012B +image=U+2111 +imagline=U+2110 +imagpart=U+2111 +imath=U+0131 +imof=U+22B7 +imped=U+01B5 +in=U+2208 +incare=U+2105 +infin=U+221E +infintie=U+29DD +inodot=U+0131 +int=U+222B +intcal=U+22BA +integers=U+2124 +intercal=U+22BA +intlarhk=U+2A17 +intprod=U+2A3C +iocy=U+0451 +iogon=U+012F +iopf=U+1D55A +iota=U+03B9 +iprod=U+2A3C +iquest=U+00BF +iquest=U+00BF +iscr=U+1D4BE +isin=U+2208 +isinE=U+22F9 +isindot=U+22F5 +isins=U+22F4 +isinsv=U+22F3 +isinv=U+2208 +it=U+2062 +itilde=U+0129 +iukcy=U+0456 +iuml=U+00EF +iuml=U+00EF +jcirc=U+0135 +jcy=U+0439 +jfr=U+1D527 +jmath=U+0237 +jopf=U+1D55B +jscr=U+1D4BF +jsercy=U+0458 +jukcy=U+0454 +kappa=U+03BA +kappav=U+03F0 +kcedil=U+0137 +kcy=U+043A +kfr=U+1D528 +kgreen=U+0138 +khcy=U+0445 +kjcy=U+045C +kopf=U+1D55C +kscr=U+1D4C0 +lAarr=U+21DA +lArr=U+21D0 +lAtail=U+291B +lBarr=U+290E +lE=U+2266 +lEg=U+2A8B +lHar=U+2962 +lacute=U+013A +laemptyv=U+29B4 +lagran=U+2112 +lambda=U+03BB +lang=U+27E8 +langd=U+2991 +langle=U+27E8 +lap=U+2A85 +laquo=U+00AB +laquo=U+00AB +larr=U+2190 +larrb=U+21E4 +larrbfs=U+291F +larrfs=U+291D +larrhk=U+21A9 +larrlp=U+21AB +larrpl=U+2939 +larrsim=U+2973 +larrtl=U+21A2 +lat=U+2AAB +latail=U+2919 +late=U+2AAD +lates=U+2AAD,U+FE00 +lbarr=U+290C +lbbrk=U+2772 +lbrace=U+007B +lbrack=U+005B +lbrke=U+298B +lbrksld=U+298F +lbrkslu=U+298D +lcaron=U+013E +lcedil=U+013C +lceil=U+2308 +lcub=U+007B +lcy=U+043B +ldca=U+2936 +ldquo=U+201C +ldquor=U+201E +ldrdhar=U+2967 +ldrushar=U+294B +ldsh=U+21B2 +le=U+2264 +leftarrow=U+2190 +leftarrowtail=U+21A2 +leftharpoondown=U+21BD +leftharpoonup=U+21BC +leftleftarrows=U+21C7 +leftrightarrow=U+2194 +leftrightarrows=U+21C6 +leftrightharpoons=U+21CB +leftrightsquigarrow=U+21AD +leftthreetimes=U+22CB +leg=U+22DA +leq=U+2264 +leqq=U+2266 +leqslant=U+2A7D +les=U+2A7D +lescc=U+2AA8 +lesdot=U+2A7F +lesdoto=U+2A81 +lesdotor=U+2A83 +lesg=U+22DA,U+FE00 +lesges=U+2A93 +lessapprox=U+2A85 +lessdot=U+22D6 +lesseqgtr=U+22DA +lesseqqgtr=U+2A8B +lessgtr=U+2276 +lesssim=U+2272 +lfisht=U+297C +lfloor=U+230A +lfr=U+1D529 +lg=U+2276 +lgE=U+2A91 +lhard=U+21BD +lharu=U+21BC +lharul=U+296A +lhblk=U+2584 +ljcy=U+0459 +ll=U+226A +llarr=U+21C7 +llcorner=U+231E +llhard=U+296B +lltri=U+25FA +lmidot=U+0140 +lmoust=U+23B0 +lmoustache=U+23B0 +lnE=U+2268 +lnap=U+2A89 +lnapprox=U+2A89 +lne=U+2A87 +lneq=U+2A87 +lneqq=U+2268 +lnsim=U+22E6 +loang=U+27EC +loarr=U+21FD +lobrk=U+27E6 +longleftarrow=U+27F5 +longleftrightarrow=U+27F7 +longmapsto=U+27FC +longrightarrow=U+27F6 +looparrowleft=U+21AB +looparrowright=U+21AC +lopar=U+2985 +lopf=U+1D55D +loplus=U+2A2D +lotimes=U+2A34 +lowast=U+2217 +lowbar=U+005F +loz=U+25CA +lozenge=U+25CA +lozf=U+29EB +lpar=U+0028 +lparlt=U+2993 +lrarr=U+21C6 +lrcorner=U+231F +lrhar=U+21CB +lrhard=U+296D +lrm=U+200E +lrtri=U+22BF +lsaquo=U+2039 +lscr=U+1D4C1 +lsh=U+21B0 +lsim=U+2272 +lsime=U+2A8D +lsimg=U+2A8F +lsqb=U+005B +lsquo=U+2018 +lsquor=U+201A +lstrok=U+0142 +lt=U+003C +lt=U+003C +ltcc=U+2AA6 +ltcir=U+2A79 +ltdot=U+22D6 +lthree=U+22CB +ltimes=U+22C9 +ltlarr=U+2976 +ltquest=U+2A7B +ltrPar=U+2996 +ltri=U+25C3 +ltrie=U+22B4 +ltrif=U+25C2 +lurdshar=U+294A +luruhar=U+2966 +lvertneqq=U+2268,U+FE00 +lvnE=U+2268,U+FE00 +mDDot=U+223A +macr=U+00AF +macr=U+00AF +male=U+2642 +malt=U+2720 +maltese=U+2720 +map=U+21A6 +mapsto=U+21A6 +mapstodown=U+21A7 +mapstoleft=U+21A4 +mapstoup=U+21A5 +marker=U+25AE +mcomma=U+2A29 +mcy=U+043C +mdash=U+2014 +measuredangle=U+2221 +mfr=U+1D52A +mho=U+2127 +micro=U+00B5 +micro=U+00B5 +mid=U+2223 +midast=U+002A +midcir=U+2AF0 +middot=U+00B7 +middot=U+00B7 +minus=U+2212 +minusb=U+229F +minusd=U+2238 +minusdu=U+2A2A +mlcp=U+2ADB +mldr=U+2026 +mnplus=U+2213 +models=U+22A7 +mopf=U+1D55E +mp=U+2213 +mscr=U+1D4C2 +mstpos=U+223E +mu=U+03BC +multimap=U+22B8 +mumap=U+22B8 +nGg=U+22D9,U+0338 +nGt=U+226B,U+20D2 +nGtv=U+226B,U+0338 +nLeftarrow=U+21CD +nLeftrightarrow=U+21CE +nLl=U+22D8,U+0338 +nLt=U+226A,U+20D2 +nLtv=U+226A,U+0338 +nRightarrow=U+21CF +nVDash=U+22AF +nVdash=U+22AE +nabla=U+2207 +nacute=U+0144 +nang=U+2220,U+20D2 +nap=U+2249 +napE=U+2A70,U+0338 +napid=U+224B,U+0338 +napos=U+0149 +napprox=U+2249 +natur=U+266E +natural=U+266E +naturals=U+2115 +nbsp=U+00A0 +nbsp=U+00A0 +nbump=U+224E,U+0338 +nbumpe=U+224F,U+0338 +ncap=U+2A43 +ncaron=U+0148 +ncedil=U+0146 +ncong=U+2247 +ncongdot=U+2A6D,U+0338 +ncup=U+2A42 +ncy=U+043D +ndash=U+2013 +ne=U+2260 +neArr=U+21D7 +nearhk=U+2924 +nearr=U+2197 +nearrow=U+2197 +nedot=U+2250,U+0338 +nequiv=U+2262 +nesear=U+2928 +nesim=U+2242,U+0338 +nexist=U+2204 +nexists=U+2204 +nfr=U+1D52B +ngE=U+2267,U+0338 +nge=U+2271 +ngeq=U+2271 +ngeqq=U+2267,U+0338 +ngeqslant=U+2A7E,U+0338 +nges=U+2A7E,U+0338 +ngsim=U+2275 +ngt=U+226F +ngtr=U+226F +nhArr=U+21CE +nharr=U+21AE +nhpar=U+2AF2 +ni=U+220B +nis=U+22FC +nisd=U+22FA +niv=U+220B +njcy=U+045A +nlArr=U+21CD +nlE=U+2266,U+0338 +nlarr=U+219A +nldr=U+2025 +nle=U+2270 +nleftarrow=U+219A +nleftrightarrow=U+21AE +nleq=U+2270 +nleqq=U+2266,U+0338 +nleqslant=U+2A7D,U+0338 +nles=U+2A7D,U+0338 +nless=U+226E +nlsim=U+2274 +nlt=U+226E +nltri=U+22EA +nltrie=U+22EC +nmid=U+2224 +nopf=U+1D55F +not=U+00AC +not=U+00AC +notin=U+2209 +notinE=U+22F9,U+0338 +notindot=U+22F5,U+0338 +notinva=U+2209 +notinvb=U+22F7 +notinvc=U+22F6 +notni=U+220C +notniva=U+220C +notnivb=U+22FE +notnivc=U+22FD +npar=U+2226 +nparallel=U+2226 +nparsl=U+2AFD,U+20E5 +npart=U+2202,U+0338 +npolint=U+2A14 +npr=U+2280 +nprcue=U+22E0 +npre=U+2AAF,U+0338 +nprec=U+2280 +npreceq=U+2AAF,U+0338 +nrArr=U+21CF +nrarr=U+219B +nrarrc=U+2933,U+0338 +nrarrw=U+219D,U+0338 +nrightarrow=U+219B +nrtri=U+22EB +nrtrie=U+22ED +nsc=U+2281 +nsccue=U+22E1 +nsce=U+2AB0,U+0338 +nscr=U+1D4C3 +nshortmid=U+2224 +nshortparallel=U+2226 +nsim=U+2241 +nsime=U+2244 +nsimeq=U+2244 +nsmid=U+2224 +nspar=U+2226 +nsqsube=U+22E2 +nsqsupe=U+22E3 +nsub=U+2284 +nsubE=U+2AC5,U+0338 +nsube=U+2288 +nsubset=U+2282,U+20D2 +nsubseteq=U+2288 +nsubseteqq=U+2AC5,U+0338 +nsucc=U+2281 +nsucceq=U+2AB0,U+0338 +nsup=U+2285 +nsupE=U+2AC6,U+0338 +nsupe=U+2289 +nsupset=U+2283,U+20D2 +nsupseteq=U+2289 +nsupseteqq=U+2AC6,U+0338 +ntgl=U+2279 +ntilde=U+00F1 +ntilde=U+00F1 +ntlg=U+2278 +ntriangleleft=U+22EA +ntrianglelefteq=U+22EC +ntriangleright=U+22EB +ntrianglerighteq=U+22ED +nu=U+03BD +num=U+0023 +numero=U+2116 +numsp=U+2007 +nvDash=U+22AD +nvHarr=U+2904 +nvap=U+224D,U+20D2 +nvdash=U+22AC +nvge=U+2265,U+20D2 +nvgt=U+003E,U+20D2 +nvinfin=U+29DE +nvlArr=U+2902 +nvle=U+2264,U+20D2 +nvlt=U+003C,U+20D2 +nvltrie=U+22B4,U+20D2 +nvrArr=U+2903 +nvrtrie=U+22B5,U+20D2 +nvsim=U+223C,U+20D2 +nwArr=U+21D6 +nwarhk=U+2923 +nwarr=U+2196 +nwarrow=U+2196 +nwnear=U+2927 +oS=U+24C8 +oacute=U+00F3 +oacute=U+00F3 +oast=U+229B +ocir=U+229A +ocirc=U+00F4 +ocirc=U+00F4 +ocy=U+043E +odash=U+229D +odblac=U+0151 +odiv=U+2A38 +odot=U+2299 +odsold=U+29BC +oelig=U+0153 +ofcir=U+29BF +ofr=U+1D52C +ogon=U+02DB +ograve=U+00F2 +ograve=U+00F2 +ogt=U+29C1 +ohbar=U+29B5 +ohm=U+03A9 +oint=U+222E +olarr=U+21BA +olcir=U+29BE +olcross=U+29BB +oline=U+203E +olt=U+29C0 +omacr=U+014D +omega=U+03C9 +omicron=U+03BF +omid=U+29B6 +ominus=U+2296 +oopf=U+1D560 +opar=U+29B7 +operp=U+29B9 +oplus=U+2295 +or=U+2228 +orarr=U+21BB +ord=U+2A5D +order=U+2134 +orderof=U+2134 +ordf=U+00AA +ordf=U+00AA +ordm=U+00BA +ordm=U+00BA +origof=U+22B6 +oror=U+2A56 +orslope=U+2A57 +orv=U+2A5B +oscr=U+2134 +oslash=U+00F8 +oslash=U+00F8 +osol=U+2298 +otilde=U+00F5 +otilde=U+00F5 +otimes=U+2297 +otimesas=U+2A36 +ouml=U+00F6 +ouml=U+00F6 +ovbar=U+233D +par=U+2225 +para=U+00B6 +para=U+00B6 +parallel=U+2225 +parsim=U+2AF3 +parsl=U+2AFD +part=U+2202 +pcy=U+043F +percnt=U+0025 +period=U+002E +permil=U+2030 +perp=U+22A5 +pertenk=U+2031 +pfr=U+1D52D +phi=U+03C6 +phiv=U+03D5 +phmmat=U+2133 +phone=U+260E +pi=U+03C0 +pitchfork=U+22D4 +piv=U+03D6 +planck=U+210F +planckh=U+210E +plankv=U+210F +plus=U+002B +plusacir=U+2A23 +plusb=U+229E +pluscir=U+2A22 +plusdo=U+2214 +plusdu=U+2A25 +pluse=U+2A72 +plusmn=U+00B1 +plusmn=U+00B1 +plussim=U+2A26 +plustwo=U+2A27 +pm=U+00B1 +pointint=U+2A15 +popf=U+1D561 +pound=U+00A3 +pound=U+00A3 +pr=U+227A +prE=U+2AB3 +prap=U+2AB7 +prcue=U+227C +pre=U+2AAF +prec=U+227A +precapprox=U+2AB7 +preccurlyeq=U+227C +preceq=U+2AAF +precnapprox=U+2AB9 +precneqq=U+2AB5 +precnsim=U+22E8 +precsim=U+227E +prime=U+2032 +primes=U+2119 +prnE=U+2AB5 +prnap=U+2AB9 +prnsim=U+22E8 +prod=U+220F +profalar=U+232E +profline=U+2312 +profsurf=U+2313 +prop=U+221D +propto=U+221D +prsim=U+227E +prurel=U+22B0 +pscr=U+1D4C5 +psi=U+03C8 +puncsp=U+2008 +qfr=U+1D52E +qint=U+2A0C +qopf=U+1D562 +qprime=U+2057 +qscr=U+1D4C6 +quaternions=U+210D +quatint=U+2A16 +quest=U+003F +questeq=U+225F +quot=U+0022 +quot=U+0022 +rAarr=U+21DB +rArr=U+21D2 +rAtail=U+291C +rBarr=U+290F +rHar=U+2964 +race=U+223D,U+0331 +racute=U+0155 +radic=U+221A +raemptyv=U+29B3 +rang=U+27E9 +rangd=U+2992 +range=U+29A5 +rangle=U+27E9 +raquo=U+00BB +raquo=U+00BB +rarr=U+2192 +rarrap=U+2975 +rarrb=U+21E5 +rarrbfs=U+2920 +rarrc=U+2933 +rarrfs=U+291E +rarrhk=U+21AA +rarrlp=U+21AC +rarrpl=U+2945 +rarrsim=U+2974 +rarrtl=U+21A3 +rarrw=U+219D +ratail=U+291A +ratio=U+2236 +rationals=U+211A +rbarr=U+290D +rbbrk=U+2773 +rbrace=U+007D +rbrack=U+005D +rbrke=U+298C +rbrksld=U+298E +rbrkslu=U+2990 +rcaron=U+0159 +rcedil=U+0157 +rceil=U+2309 +rcub=U+007D +rcy=U+0440 +rdca=U+2937 +rdldhar=U+2969 +rdquo=U+201D +rdquor=U+201D +rdsh=U+21B3 +real=U+211C +realine=U+211B +realpart=U+211C +reals=U+211D +rect=U+25AD +reg=U+00AE +reg=U+00AE +rfisht=U+297D +rfloor=U+230B +rfr=U+1D52F +rhard=U+21C1 +rharu=U+21C0 +rharul=U+296C +rho=U+03C1 +rhov=U+03F1 +rightarrow=U+2192 +rightarrowtail=U+21A3 +rightharpoondown=U+21C1 +rightharpoonup=U+21C0 +rightleftarrows=U+21C4 +rightleftharpoons=U+21CC +rightrightarrows=U+21C9 +rightsquigarrow=U+219D +rightthreetimes=U+22CC +ring=U+02DA +risingdotseq=U+2253 +rlarr=U+21C4 +rlhar=U+21CC +rlm=U+200F +rmoust=U+23B1 +rmoustache=U+23B1 +rnmid=U+2AEE +roang=U+27ED +roarr=U+21FE +robrk=U+27E7 +ropar=U+2986 +ropf=U+1D563 +roplus=U+2A2E +rotimes=U+2A35 +rpar=U+0029 +rpargt=U+2994 +rppolint=U+2A12 +rrarr=U+21C9 +rsaquo=U+203A +rscr=U+1D4C7 +rsh=U+21B1 +rsqb=U+005D +rsquo=U+2019 +rsquor=U+2019 +rthree=U+22CC +rtimes=U+22CA +rtri=U+25B9 +rtrie=U+22B5 +rtrif=U+25B8 +rtriltri=U+29CE +ruluhar=U+2968 +rx=U+211E +sacute=U+015B +sbquo=U+201A +sc=U+227B +scE=U+2AB4 +scap=U+2AB8 +scaron=U+0161 +sccue=U+227D +sce=U+2AB0 +scedil=U+015F +scirc=U+015D +scnE=U+2AB6 +scnap=U+2ABA +scnsim=U+22E9 +scpolint=U+2A13 +scsim=U+227F +scy=U+0441 +sdot=U+22C5 +sdotb=U+22A1 +sdote=U+2A66 +seArr=U+21D8 +searhk=U+2925 +searr=U+2198 +searrow=U+2198 +sect=U+00A7 +sect=U+00A7 +semi=U+003B +seswar=U+2929 +setminus=U+2216 +setmn=U+2216 +sext=U+2736 +sfr=U+1D530 +sfrown=U+2322 +sharp=U+266F +shchcy=U+0449 +shcy=U+0448 +shortmid=U+2223 +shortparallel=U+2225 +shy=U+00AD +shy=U+00AD +sigma=U+03C3 +sigmaf=U+03C2 +sigmav=U+03C2 +sim=U+223C +simdot=U+2A6A +sime=U+2243 +simeq=U+2243 +simg=U+2A9E +simgE=U+2AA0 +siml=U+2A9D +simlE=U+2A9F +simne=U+2246 +simplus=U+2A24 +simrarr=U+2972 +slarr=U+2190 +smallsetminus=U+2216 +smashp=U+2A33 +smeparsl=U+29E4 +smid=U+2223 +smile=U+2323 +smt=U+2AAA +smte=U+2AAC +smtes=U+2AAC,U+FE00 +softcy=U+044C +sol=U+002F +solb=U+29C4 +solbar=U+233F +sopf=U+1D564 +spades=U+2660 +spadesuit=U+2660 +spar=U+2225 +sqcap=U+2293 +sqcaps=U+2293,U+FE00 +sqcup=U+2294 +sqcups=U+2294,U+FE00 +sqsub=U+228F +sqsube=U+2291 +sqsubset=U+228F +sqsubseteq=U+2291 +sqsup=U+2290 +sqsupe=U+2292 +sqsupset=U+2290 +sqsupseteq=U+2292 +squ=U+25A1 +square=U+25A1 +squarf=U+25AA +squf=U+25AA +srarr=U+2192 +sscr=U+1D4C8 +ssetmn=U+2216 +ssmile=U+2323 +sstarf=U+22C6 +star=U+2606 +starf=U+2605 +straightepsilon=U+03F5 +straightphi=U+03D5 +strns=U+00AF +sub=U+2282 +subE=U+2AC5 +subdot=U+2ABD +sube=U+2286 +subedot=U+2AC3 +submult=U+2AC1 +subnE=U+2ACB +subne=U+228A +subplus=U+2ABF +subrarr=U+2979 +subset=U+2282 +subseteq=U+2286 +subseteqq=U+2AC5 +subsetneq=U+228A +subsetneqq=U+2ACB +subsim=U+2AC7 +subsub=U+2AD5 +subsup=U+2AD3 +succ=U+227B +succapprox=U+2AB8 +succcurlyeq=U+227D +succeq=U+2AB0 +succnapprox=U+2ABA +succneqq=U+2AB6 +succnsim=U+22E9 +succsim=U+227F +sum=U+2211 +sung=U+266A +sup1=U+00B9 +sup1=U+00B9 +sup2=U+00B2 +sup2=U+00B2 +sup3=U+00B3 +sup3=U+00B3 +sup=U+2283 +supE=U+2AC6 +supdot=U+2ABE +supdsub=U+2AD8 +supe=U+2287 +supedot=U+2AC4 +suphsol=U+27C9 +suphsub=U+2AD7 +suplarr=U+297B +supmult=U+2AC2 +supnE=U+2ACC +supne=U+228B +supplus=U+2AC0 +supset=U+2283 +supseteq=U+2287 +supseteqq=U+2AC6 +supsetneq=U+228B +supsetneqq=U+2ACC +supsim=U+2AC8 +supsub=U+2AD4 +supsup=U+2AD6 +swArr=U+21D9 +swarhk=U+2926 +swarr=U+2199 +swarrow=U+2199 +swnwar=U+292A +szlig=U+00DF +szlig=U+00DF +target=U+2316 +tau=U+03C4 +tbrk=U+23B4 +tcaron=U+0165 +tcedil=U+0163 +tcy=U+0442 +tdot=U+20DB +telrec=U+2315 +tfr=U+1D531 +there4=U+2234 +therefore=U+2234 +theta=U+03B8 +thetasym=U+03D1 +thetav=U+03D1 +thickapprox=U+2248 +thicksim=U+223C +thinsp=U+2009 +thkap=U+2248 +thksim=U+223C +thorn=U+00FE +thorn=U+00FE +tilde=U+02DC +times=U+00D7 +times=U+00D7 +timesb=U+22A0 +timesbar=U+2A31 +timesd=U+2A30 +tint=U+222D +toea=U+2928 +top=U+22A4 +topbot=U+2336 +topcir=U+2AF1 +topf=U+1D565 +topfork=U+2ADA +tosa=U+2929 +tprime=U+2034 +trade=U+2122 +triangle=U+25B5 +triangledown=U+25BF +triangleleft=U+25C3 +trianglelefteq=U+22B4 +triangleq=U+225C +triangleright=U+25B9 +trianglerighteq=U+22B5 +tridot=U+25EC +trie=U+225C +triminus=U+2A3A +triplus=U+2A39 +trisb=U+29CD +tritime=U+2A3B +trpezium=U+23E2 +tscr=U+1D4C9 +tscy=U+0446 +tshcy=U+045B +tstrok=U+0167 +twixt=U+226C +twoheadleftarrow=U+219E +twoheadrightarrow=U+21A0 +uArr=U+21D1 +uHar=U+2963 +uacute=U+00FA +uacute=U+00FA +uarr=U+2191 +ubrcy=U+045E +ubreve=U+016D +ucirc=U+00FB +ucirc=U+00FB +ucy=U+0443 +udarr=U+21C5 +udblac=U+0171 +udhar=U+296E +ufisht=U+297E +ufr=U+1D532 +ugrave=U+00F9 +ugrave=U+00F9 +uharl=U+21BF +uharr=U+21BE +uhblk=U+2580 +ulcorn=U+231C +ulcorner=U+231C +ulcrop=U+230F +ultri=U+25F8 +umacr=U+016B +uml=U+00A8 +uml=U+00A8 +uogon=U+0173 +uopf=U+1D566 +uparrow=U+2191 +updownarrow=U+2195 +upharpoonleft=U+21BF +upharpoonright=U+21BE +uplus=U+228E +upsi=U+03C5 +upsih=U+03D2 +upsilon=U+03C5 +upuparrows=U+21C8 +urcorn=U+231D +urcorner=U+231D +urcrop=U+230E +uring=U+016F +urtri=U+25F9 +uscr=U+1D4CA +utdot=U+22F0 +utilde=U+0169 +utri=U+25B5 +utrif=U+25B4 +uuarr=U+21C8 +uuml=U+00FC +uuml=U+00FC +uwangle=U+29A7 +vArr=U+21D5 +vBar=U+2AE8 +vBarv=U+2AE9 +vDash=U+22A8 +vangrt=U+299C +varepsilon=U+03F5 +varkappa=U+03F0 +varnothing=U+2205 +varphi=U+03D5 +varpi=U+03D6 +varpropto=U+221D +varr=U+2195 +varrho=U+03F1 +varsigma=U+03C2 +varsubsetneq=U+228A,U+FE00 +varsubsetneqq=U+2ACB,U+FE00 +varsupsetneq=U+228B,U+FE00 +varsupsetneqq=U+2ACC,U+FE00 +vartheta=U+03D1 +vartriangleleft=U+22B2 +vartriangleright=U+22B3 +vcy=U+0432 +vdash=U+22A2 +vee=U+2228 +veebar=U+22BB +veeeq=U+225A +vellip=U+22EE +verbar=U+007C +vert=U+007C +vfr=U+1D533 +vltri=U+22B2 +vnsub=U+2282,U+20D2 +vnsup=U+2283,U+20D2 +vopf=U+1D567 +vprop=U+221D +vrtri=U+22B3 +vscr=U+1D4CB +vsubnE=U+2ACB,U+FE00 +vsubne=U+228A,U+FE00 +vsupnE=U+2ACC,U+FE00 +vsupne=U+228B,U+FE00 +vzigzag=U+299A +wcirc=U+0175 +wedbar=U+2A5F +wedge=U+2227 +wedgeq=U+2259 +weierp=U+2118 +wfr=U+1D534 +wopf=U+1D568 +wp=U+2118 +wr=U+2240 +wreath=U+2240 +wscr=U+1D4CC +xcap=U+22C2 +xcirc=U+25EF +xcup=U+22C3 +xdtri=U+25BD +xfr=U+1D535 +xhArr=U+27FA +xharr=U+27F7 +xi=U+03BE +xlArr=U+27F8 +xlarr=U+27F5 +xmap=U+27FC +xnis=U+22FB +xodot=U+2A00 +xopf=U+1D569 +xoplus=U+2A01 +xotime=U+2A02 +xrArr=U+27F9 +xrarr=U+27F6 +xscr=U+1D4CD +xsqcup=U+2A06 +xuplus=U+2A04 +xutri=U+25B3 +xvee=U+22C1 +xwedge=U+22C0 +yacute=U+00FD +yacute=U+00FD +yacy=U+044F +ycirc=U+0177 +ycy=U+044B +yen=U+00A5 +yen=U+00A5 +yfr=U+1D536 +yicy=U+0457 +yopf=U+1D56A +yscr=U+1D4CE +yucy=U+044E +yuml=U+00FF +yuml=U+00FF +zacute=U+017A +zcaron=U+017E +zcy=U+0437 +zdot=U+017C +zeetrf=U+2128 +zeta=U+03B6 +zfr=U+1D537 +zhcy=U+0436 +zigrarr=U+21DD +zopf=U+1D56B +zscr=U+1D4CF +zwj=U+200D +zwnj=U+200C diff --git a/exist-core/src/test/java/org/exist/util/serializer/HTML5FragmentTest.java b/exist-core/src/test/java/org/exist/util/serializer/HTML5FragmentTest.java new file mode 100644 index 00000000000..f1708e31ea1 --- /dev/null +++ b/exist-core/src/test/java/org/exist/util/serializer/HTML5FragmentTest.java @@ -0,0 +1,220 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.util.serializer; + +import org.exist.EXistException; +import org.exist.storage.BrokerPool; +import org.exist.storage.DBBroker; +import org.exist.storage.serializers.EXistOutputKeys; +import org.exist.test.ExistEmbeddedServer; +import org.exist.security.PermissionDeniedException; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQuery; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.Sequence; +import org.junit.ClassRule; +import org.junit.Test; +import org.xml.sax.SAXException; + +import javax.xml.transform.OutputKeys; +import java.io.StringWriter; +import java.util.Properties; + +import static org.junit.Assert.*; + +/** + * Tests that HTML5 serialization does not emit DOCTYPE for fragments + * (non-html root elements). + */ +public class HTML5FragmentTest { + + @ClassRule + public static final ExistEmbeddedServer existEmbeddedServer = new ExistEmbeddedServer(true, true); + + private String serialize(final String xquery, final String method, final String version) + throws EXistException, XPathException, SAXException, PermissionDeniedException { + final BrokerPool pool = existEmbeddedServer.getBrokerPool(); + try (final DBBroker broker = pool.get(java.util.Optional.empty())) { + final XQuery xqueryService = pool.getXQueryService(); + final XQueryContext context = new XQueryContext(pool); + final Sequence result = xqueryService.execute(broker, xquery, null); + + final Properties props = new Properties(); + props.setProperty(OutputKeys.METHOD, method); + props.setProperty(OutputKeys.INDENT, "no"); + props.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + if (version != null) { + props.setProperty(OutputKeys.VERSION, version); + } + props.setProperty(EXistOutputKeys.XDM_SERIALIZATION, "yes"); + + final StringWriter writer = new StringWriter(); + final XQuerySerializer serializer = new XQuerySerializer(broker, props, writer); + serializer.serialize(result); + return writer.toString(); + } + } + + @Test + public void htmlDocumentGetsDoctype() throws Exception { + final String result = serialize("

hello

", "html", "5.0"); + assertTrue("HTML document should have DOCTYPE: " + result, + result.contains("")); + } + + @Test + public void htmlFragmentNoDoctype() throws Exception { + final String result = serialize("

hello

", "html", "5.0"); + assertFalse("HTML fragment should NOT have DOCTYPE: " + result, + result.contains("hello

")); + } + + @Test + public void htmlFragmentDivNoDoctype() throws Exception { + final String result = serialize("
text
", "html", "5.0"); + assertFalse("HTML div fragment should NOT have DOCTYPE: " + result, + result.contains("item", "html", "5.0"); + assertFalse("HTML li fragment should NOT have DOCTYPE: " + result, + result.contains("

hello

", + "xhtml", "5.0"); + assertTrue("XHTML document should have DOCTYPE: " + result, + result.contains("")); + } + + @Test + public void xhtmlFragmentNoDoctype() throws Exception { + final String result = serialize( + "

hello

", + "xhtml", "5.0"); + assertFalse("XHTML fragment should NOT have DOCTYPE: " + result, + result.contains("
  • One

"; + final Sequence result = xqueryService.execute(broker, xquery, null); + + final Properties props = new Properties(); + props.setProperty(OutputKeys.METHOD, "html"); + props.setProperty(OutputKeys.INDENT, "yes"); + props.setProperty(OutputKeys.VERSION, "5.0"); + props.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + props.setProperty("suppress-indentation", "li td"); + props.setProperty(EXistOutputKeys.XDM_SERIALIZATION, "yes"); + + final StringWriter writer = new StringWriter(); + final XQuerySerializer serializer = new XQuerySerializer(broker, props, writer); + serializer.serialize(result); + final String output = writer.toString(); + + // li should NOT have indentation inside it + assertTrue("li content should not be indented: " + output, + output.contains("
  • One

  • ")); + } + } + + @Test + public void htmlSuppressIndentationViaFnSerialize() throws Exception { + final BrokerPool pool = existEmbeddedServer.getBrokerPool(); + try (final DBBroker broker = pool.get(java.util.Optional.empty())) { + final XQuery xqueryService = pool.getXQueryService(); + // Use fn:serialize with suppress-indentation — pass QNames, not string + final String xquery = + "serialize(
    • One

    , " + + "map { 'method': 'html', 'indent': true(), 'version': '5.0', " + + "'suppress-indentation': (xs:QName('li'), xs:QName('td')) })"; + final Sequence result = xqueryService.execute(broker, xquery, null); + final String output = result.getStringValue(); + + // li should NOT have indentation inside it + assertTrue("li content should not be indented via fn:serialize: " + output, + output.contains("
  • One

  • ")); + } + } + + @Test + public void htmlCdataSectionElementsSuppressed() throws Exception { + // For HTML method, cdata-section-elements should be IGNORED + // Text should not be wrapped in CDATA markers + final BrokerPool pool = existEmbeddedServer.getBrokerPool(); + try (final DBBroker broker = pool.get(java.util.Optional.empty())) { + final XQuery xqueryService = pool.getXQueryService(); + final String xquery = "

    No CDATA

    "; + final Sequence result = xqueryService.execute(broker, xquery, null); + + final Properties props = new Properties(); + props.setProperty(OutputKeys.METHOD, "html"); + props.setProperty(OutputKeys.INDENT, "no"); + props.setProperty(OutputKeys.VERSION, "5.0"); + props.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + props.setProperty(OutputKeys.CDATA_SECTION_ELEMENTS, "b"); + props.setProperty(EXistOutputKeys.XDM_SERIALIZATION, "yes"); + + final StringWriter writer = new StringWriter(); + final XQuerySerializer serializer = new XQuerySerializer(broker, props, writer); + serializer.serialize(result); + final String output = writer.toString(); + + assertFalse("HTML output should not contain CDATA: " + output, + output.contains("No CDATA")); + } + } + + @Test + public void htmlScriptAttributeEscaped() throws Exception { + // In HTML5, attributes on script elements MUST be escaped + // but text content inside script elements must NOT be escaped + final String result = serialize("", + "html", "5.0"); + assertTrue("Script attribute & should be escaped: " + result, + result.contains("language=\"Jack&Jill\"")); + assertTrue("Script body && should NOT be escaped: " + result, + result.contains("go && run()")); + } + + @Test + public void html40NoDoctypeWithoutPublicSystem() throws Exception { + // HTML 4.0 without doctype-public/doctype-system should not emit DOCTYPE + final String result = serialize("

    hello

    ", "html", "4.0"); + assertFalse("HTML 4.0 without public/system should NOT have DOCTYPE: " + result, + result.contains("
  • Scotland
    • Highlands
      • Fort William
      • Inverness
    • Lowlands
      • Glasgow
  • ") + %test:assertEquals("
    • Scotland
      • Highlands
        • Inverness
        • Fort William
      • Lowlands
        • Glasgow
    ") function mt:nested-map-for-each() {