diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java index e298e084cc8..d20eed4921e 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java @@ -21,6 +21,8 @@ */ package org.exist.xquery.functions.fn; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.exist.collections.Collection; import org.exist.dom.QName; import org.exist.dom.persistent.DefaultDocumentSet; @@ -38,10 +40,18 @@ import org.exist.xquery.functions.xmldb.XMLDBModule; import org.exist.xquery.value.*; +import java.io.IOException; +import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.Iterator; +import org.exist.xquery.util.DocUtils; + import static org.exist.xquery.FunctionDSL.*; /** @@ -49,6 +59,8 @@ */ public class ExtCollection extends BasicFunction { + private static final Logger LOG = LogManager.getLogger(ExtCollection.class); + private static final String FS_COLLECTION_NAME = "collection"; static final FunctionSignature[] FS_COLLECTION = functionSignatures( new QName(FS_COLLECTION_NAME, Function.BUILTIN_FUNCTION_NS), @@ -120,6 +132,10 @@ private void getCollectionItems(final URI collectionUri, final Sequence items) t if (dynamicCollection != null) { items.addAll(dynamicCollection); + } else if ("file".equals(collectionUri.getScheme())) { + // file: URI — scan directory for XML files + getFileCollectionItems(collectionUri, items); + } else { final MutableDocumentSet ndocs = new DefaultDocumentSet(); final XmldbURI uri = XmldbURI.create(collectionUri); @@ -147,6 +163,54 @@ private void getCollectionItems(final URI collectionUri, final Sequence items) t } } + /** + * Scan a file: URI directory for XML files and parse them into in-memory documents. + * Supports an optional ?select=glob query parameter for filtering (e.g., ?select=*.xml). + * Only DBA users can access the file system directly. + */ + private void getFileCollectionItems(final URI collectionUri, final Sequence items) throws XPathException { + // Security: only DBA users can access file: URIs + if (!context.getBroker().getCurrentSubject().hasDbaRole()) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Permission denied: only DBA users can access file: URIs in fn:collection()"); + } + + // Extract path and optional ?select= glob filter + final String filePath = collectionUri.getPath(); + final String query = collectionUri.getQuery(); + String globPattern = "*.xml"; // default: XML files only + if (query != null && query.startsWith("select=")) { + globPattern = query.substring("select=".length()); + } + + final Path dir = Paths.get(filePath); + if (!Files.isDirectory(dir)) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Directory does not exist: " + filePath); + } + + try (final DirectoryStream stream = Files.newDirectoryStream(dir, globPattern)) { + for (final Path file : stream) { + if (Files.isRegularFile(file) && Files.isReadable(file)) { + try (final InputStream is = Files.newInputStream(file)) { + final org.exist.dom.memtree.DocumentImpl doc = + DocUtils.parse(context, is, this); + doc.setDocumentURI(file.toUri().toString()); + items.add(doc); + } catch (final XPathException | IOException e) { + // Skip non-parseable files (they may not be well-formed XML) + if (LOG.isDebugEnabled()) { + LOG.debug("Skipping non-parseable file in collection: {}", file, e); + } + } + } + } + } catch (final IOException e) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Error reading directory: " + e.getMessage()); + } + } + protected URI asUri(final String path) throws XPathException { try { URI uri = new URI(path); diff --git a/exist-core/src/test/xquery/xquery3/fnCollectionFileUri.xql b/exist-core/src/test/xquery/xquery3/fnCollectionFileUri.xql new file mode 100644 index 00000000000..2e6dfca36a8 --- /dev/null +++ b/exist-core/src/test/xquery/xquery3/fnCollectionFileUri.xql @@ -0,0 +1,36 @@ +(: + : eXist-db Open Source Native XML Database + : Copyright (C) 2001 The eXist-db Authors + : + : info@exist-db.org + : http://www.exist-db.org + : + : This library is free software; you can redistribute it and/or + : modify it under the terms of the GNU Lesser General Public + : License as published by the Free Software Foundation; either + : version 2.1 of the License, or (at your option) any later version. + : + : This library is distributed in the hope that it will be useful, + : but WITHOUT ANY WARRANTY; without even the implied warranty of + : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + : Lesser General Public License for more details. + : + : You should have received a copy of the GNU Lesser General Public + : License along with this library; if not, write to the Free Software + : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + :) +xquery version "3.1"; + +(:~ + : Tests for fn:collection() with file: URIs. + :) +module namespace cfu="http://exist-db.org/xquery/test/collection-file-uri"; + +declare namespace test="http://exist-db.org/xquery/xqsuite"; + +(: Non-existing directory should throw FODC0002 :) +declare + %test:assertError("FODC0002") +function cfu:collection-nonexistent-dir() { + collection("file:///nonexistent-dir-xyz-42-does-not-exist") +};