diff --git a/src/main/java/org/apache/maven/plugins/clean/Cleaner.java b/src/main/java/org/apache/maven/plugins/clean/Cleaner.java index 5715981a..2ad8276e 100644 --- a/src/main/java/org/apache/maven/plugins/clean/Cleaner.java +++ b/src/main/java/org/apache/maven/plugins/clean/Cleaner.java @@ -32,6 +32,7 @@ import java.nio.file.attribute.PosixFileAttributeView; import java.nio.file.attribute.PosixFilePermission; import java.util.ArrayDeque; +import java.util.Arrays; import java.util.BitSet; import java.util.Deque; import java.util.EnumSet; @@ -105,8 +106,12 @@ final class Cleaner implements FileVisitor { @Nonnull private final String fastMode; + /** + * Combination of includes and excludes path matchers. + * A {@code null} value means to include everything. + */ @Nullable - private Selector selector; + private PathSelector selector; /** * Whether the base directory is excluded from the set of directories to delete. @@ -205,7 +210,11 @@ final class Cleaner implements FileVisitor { * @throws IOException if a file/directory could not be deleted and {@code failOnError} is {@code true} */ public void delete(@Nonnull Fileset fileset) throws IOException { - selector = new Selector(fileset); + selector = new PathSelector( + fileset.getDirectory(), + Arrays.asList(fileset.getIncludes()), + Arrays.asList(fileset.getExcludes()), + fileset.isUseDefaultExcludes()); if (selector.isEmpty()) { selector = null; } diff --git a/src/main/java/org/apache/maven/plugins/clean/Fileset.java b/src/main/java/org/apache/maven/plugins/clean/Fileset.java index 2842ec6a..de497f8c 100644 --- a/src/main/java/org/apache/maven/plugins/clean/Fileset.java +++ b/src/main/java/org/apache/maven/plugins/clean/Fileset.java @@ -41,6 +41,11 @@ public class Fileset { private boolean useDefaultExcludes; + /** + * Creates an initially empty file set. + */ + public Fileset() {} + /** * {@return the base directory}. */ @@ -96,7 +101,7 @@ public boolean isUseDefaultExcludes() { * Appends the elements of the given array in the given buffer. * This is a helper method for {@link #toString()} implementations. * - * @param buffer the buffer where to add the elements + * @param buffer the buffer to add the elements to * @param label label identifying the array of elements to add * @param patterns the elements to append, or {@code null} if none */ diff --git a/src/main/java/org/apache/maven/plugins/clean/PathSelector.java b/src/main/java/org/apache/maven/plugins/clean/PathSelector.java new file mode 100644 index 00000000..2d810829 --- /dev/null +++ b/src/main/java/org/apache/maven/plugins/clean/PathSelector.java @@ -0,0 +1,640 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.maven.plugins.clean; + +import java.io.File; +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +/** + * Determines whether a path is selected according to include/exclude patterns. + * The pathnames used for method parameters will be relative to some base directory + * and use {@code '/'} as separator, regardless of the hosting operating system. + * + *

Syntax

+ * If a pattern contains the {@code ':'} character and the prefix before is longer than 1 character, + * then that pattern is given verbatim to {@link FileSystem#getPathMatcher(String)}, which interprets + * the part before {@code ':'} as the syntax (usually {@code "glob"} or {@code "regex"}). + * If a pattern does not contain the {@code ':'} character, or if the prefix is one character long + * (interpreted as a Windows drive), then the syntax defaults to a reproduction of the Maven 3 behavior. + * This is implemented as the {@code "glob"} syntax with the following modifications: + * + * + * + * If above changes are not desired, put an explicit {@code "glob:"} prefix before the pattern. + * Note that putting such a prefix is recommended anyway for better performances. + * + * @author Benjamin Bentmann + * @author Martin Desruisseaux + * + * @see java.nio.file.FileSystem#getPathMatcher(String) + */ +final class PathSelector implements PathMatcher { + /** + * Patterns which should be excluded by default, like SCM files. + * + *

Source: this list is copied from {@code plexus-utils-4.0.2} (released in + * September 23, 2024), class {@code org.codehaus.plexus.util.AbstractScanner}.

+ */ + private static final List DEFAULT_EXCLUDES = List.of( + // Miscellaneous typical temporary files + "**/*~", + "**/#*#", + "**/.#*", + "**/%*%", + "**/._*", + + // CVS + "**/CVS", + "**/CVS/**", + "**/.cvsignore", + + // RCS + "**/RCS", + "**/RCS/**", + + // SCCS + "**/SCCS", + "**/SCCS/**", + + // Visual SourceSafe + "**/vssver.scc", + + // MKS + "**/project.pj", + + // Subversion + "**/.svn", + "**/.svn/**", + + // Arch + "**/.arch-ids", + "**/.arch-ids/**", + + // Bazaar + "**/.bzr", + "**/.bzr/**", + + // SurroundSCM + "**/.MySCMServerInfo", + + // Mac + "**/.DS_Store", + + // Serena Dimensions Version 10 + "**/.metadata", + "**/.metadata/**", + + // Mercurial + "**/.hg", + "**/.hg/**", + + // git + "**/.git", + "**/.git/**", + "**/.gitignore", + + // BitKeeper + "**/BitKeeper", + "**/BitKeeper/**", + "**/ChangeSet", + "**/ChangeSet/**", + + // darcs + "**/_darcs", + "**/_darcs/**", + "**/.darcsrepo", + "**/.darcsrepo/**", + "**/-darcs-backup*", + "**/.darcs-temp-mail"); + + /** + * Maximum number of characters of the prefix before {@code ':'} for handling as a Maven syntax. + */ + private static final int MAVEN_SYNTAX_THRESHOLD = 1; + + /** + * The default syntax to use if none was specified. Note that when this default syntax is applied, + * the user-provided pattern get some changes as documented in class Javadoc. + */ + private static final String DEFAULT_SYNTAX = "glob:"; + + /** + * Characters having a special meaning in the glob syntax. + * + * @see FileSystem#getPathMatcher(String) + */ + private static final String SPECIAL_CHARACTERS = "*?[]{}\\"; + + /** + * A path matcher which accepts all files. + * + * @see #simplify() + */ + private static final PathMatcher INCLUDES_ALL = (path) -> true; + + /** + * String representations of the normalized include filters. + * Each pattern shall be prefixed by its syntax, which is {@value #DEFAULT_SYNTAX} by default. + * An empty array means to include all files. + * + * @see #toString() + */ + private final String[] includePatterns; + + /** + * String representations of the normalized exclude filters. + * Each pattern shall be prefixed by its syntax. If no syntax is specified, + * the default is a Maven 3 syntax similar, but not identical, to {@value #DEFAULT_SYNTAX}. + * This array may be longer or shorter than the user-supplied excludes, depending on whether + * default excludes have been added and whether some unnecessary excludes have been omitted. + * + * @see #toString() + */ + private final String[] excludePatterns; + + /** + * The matcher for includes. The length of this array is equal to {@link #includePatterns} array length. + * An empty array means to include all files. + */ + private final PathMatcher[] includes; + + /** + * The matcher for excludes. The length of this array is equal to {@link #excludePatterns} array length. + */ + private final PathMatcher[] excludes; + + /** + * The matcher for all directories to include. This array includes the parents of all those directories, + * because they need to be accepted before we can walk to the sub-directories. + * This is an optimization for skipping whole directories when possible. + * An empty array means to include all directories. + */ + private final PathMatcher[] dirIncludes; + + /** + * The matcher for directories to exclude. This array does not include the parent directories, + * because they may contain other sub-trees that need to be included. + * This is an optimization for skipping whole directories when possible. + */ + private final PathMatcher[] dirExcludes; + + /** + * The base directory. All files will be relativized to that directory before to be matched. + */ + private final Path baseDirectory; + + /** + * Whether paths must be relativized before to be given to a matcher. If {@code true}, then every paths + * will be made relative to {@link #baseDirectory} for allowing patterns like {@code "foo/bar/*.java"} + * to work. As a slight optimization, we can skip this step if all patterns start with {@code "**"}. + */ + private final boolean needRelativize; + + /** + * Creates a new selector from the given includes and excludes. + * + * @param directory the base directory of the files to filter + * @param includes the patterns of the files to include, or null or empty for including all files + * @param excludes the patterns of the files to exclude, or null or empty for no exclusion + * @param useDefaultExcludes whether to augment the excludes with a default set of SCM patterns + */ + PathSelector(Path directory, Collection includes, Collection excludes, boolean useDefaultExcludes) { + includePatterns = normalizePatterns(includes, false); + excludePatterns = normalizePatterns(effectiveExcludes(excludes, includePatterns, useDefaultExcludes), true); + baseDirectory = directory; + FileSystem fs = directory.getFileSystem(); + this.includes = matchers(fs, includePatterns); + this.excludes = matchers(fs, excludePatterns); + dirIncludes = matchers(fs, directoryPatterns(includePatterns, false)); + dirExcludes = matchers(fs, directoryPatterns(excludePatterns, true)); + needRelativize = needRelativize(includePatterns) || needRelativize(excludePatterns); + } + + /** + * Returns the given array of excludes, optionally expanded with a default set of excludes, + * then with unnecessary excludes omitted. An unnecessary exclude is an exclude which will never + * match a file because there is no include which would accept a file that could match the exclude. + * For example, if the only include is {@code "*.java"}, then the "**/project.pj", + * "**/.DS_Store" and other excludes will never match a file and can be omitted. + * Because the list of {@linkplain #DEFAULT_EXCLUDES default excludes} contains many elements, + * removing unnecessary excludes can reduce a lot the number of matches tested on each source file. + * + *

Implementation note

+ * The removal of unnecessary excludes is done on a best effort basis. The current implementation + * compares only the prefixes and suffixes of each pattern, keeping the pattern in case of doubt. + * This is not bad, but it does not remove all unnecessary patterns. It would be possible to do + * better in the future if benchmarking suggests that it would be worth the effort. + * + * @param excludes the user-specified excludes, potentially not yet converted to glob syntax + * @param includes the include patterns converted to glob syntax + * @param useDefaultExcludes whether to expand user exclude with the set of default excludes + * @return the potentially expanded or reduced set of excludes to use + */ + private static Collection effectiveExcludes( + Collection excludes, final String[] includes, final boolean useDefaultExcludes) { + if (excludes == null || excludes.isEmpty()) { + if (useDefaultExcludes) { + excludes = new ArrayList<>(DEFAULT_EXCLUDES); + } else { + return List.of(); + } + } else { + excludes = new ArrayList<>(excludes); + excludes.removeIf(Objects::isNull); + if (useDefaultExcludes) { + excludes.addAll(DEFAULT_EXCLUDES); + } + } + if (includes.length == 0) { + return excludes; + } + /* + * Get the prefixes and suffixes of all includes, stopping at the first special character. + * Redundant prefixes and suffixes are omitted. + */ + var prefixes = new String[includes.length]; + var suffixes = new String[includes.length]; + for (int i = 0; i < includes.length; i++) { + String include = includes[i]; + if (!include.startsWith(DEFAULT_SYNTAX)) { + return excludes; // Do not filter if at least one pattern is too complicated. + } + include = include.substring(DEFAULT_SYNTAX.length()); + prefixes[i] = prefixOrSuffix(include, false); + suffixes[i] = prefixOrSuffix(include, true); + } + prefixes = sortByLength(prefixes, false); + suffixes = sortByLength(suffixes, true); + /* + * Keep only the exclude which start with one of the prefixes and end with one of the suffixes. + * Note that a prefix or suffix may be the empty string, which match everything. + */ + final Iterator it = excludes.iterator(); + nextExclude: + while (it.hasNext()) { + final String exclude = it.next(); + final int s = exclude.indexOf(':'); + if (s <= MAVEN_SYNTAX_THRESHOLD || exclude.startsWith(DEFAULT_SYNTAX)) { + if (cannotMatch(exclude, prefixes, false) || cannotMatch(exclude, suffixes, true)) { + it.remove(); + } + } + } + return excludes; + } + + /** + * Returns the maximal amount of ordinary characters at the beginning or end of the given pattern. + * The prefix or suffix stops at the first {@linkplain #SPECIAL_CHARACTERS special character}. + * + * @param include the pattern for which to get a prefix or suffix without special character + * @param suffix {@code false} if a prefix is desired, or {@code true} if a suffix is desired + */ + private static String prefixOrSuffix(final String include, boolean suffix) { + int s = suffix ? -1 : include.length(); + for (int i = SPECIAL_CHARACTERS.length(); --i >= 0; ) { + char c = SPECIAL_CHARACTERS.charAt(i); + if (suffix) { + s = Math.max(s, include.lastIndexOf(c)); + } else { + int p = include.indexOf(c); + if (p >= 0 && p < s) { + s = p; + } + } + } + return suffix ? include.substring(s + 1) : include.substring(0, s); + } + + /** + * Returns {@code true} if the given exclude cannot match any include patterns. + * In case of doubt, returns {@code false}. + * + * @param exclude the exclude pattern to test + * @param fragments the prefixes or suffixes (fragments without special characters) of the includes + * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes + * @return {@code true} if it is certain that the exclude pattern cannot match, or {@code false} in case of doubt + */ + private static boolean cannotMatch(String exclude, final String[] fragments, final boolean suffix) { + exclude = prefixOrSuffix(exclude, suffix); + for (String fragment : fragments) { + int fg = fragment.length(); + int ex = exclude.length(); + int length = Math.min(fg, ex); + if (suffix) { + fg -= length; + ex -= length; + } else { + fg = 0; + ex = 0; + } + if (exclude.regionMatches(ex, fragment, fg, length)) { + return false; + } + } + return true; + } + + /** + * Sorts the given patterns by their length. The main intent is to have the empty string first, + * while will cause the loops testing for prefixes and suffixes to stop almost immediately. + * Short prefixes or suffixes are also more likely to be matched. + * + * @param fragments the fragments to sort in-place + * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes + * @return the given array, or a smaller array if some fragments were discarded because redundant + */ + private static String[] sortByLength(final String[] fragments, final boolean suffix) { + Arrays.sort(fragments, (s1, s2) -> s1.length() - s2.length()); + int count = 0; + /* + * Simplify the array of prefixes or suffixes by removing all redundant elements. + * An element is redundant if there is a shorter prefix or suffix with the same characters. + */ + nextBase: + for (String fragment : fragments) { + for (int i = count; --i >= 0; ) { + String base = fragments[i]; + if (suffix ? fragment.endsWith(base) : fragment.startsWith(base)) { + continue nextBase; // Skip this fragment + } + } + fragments[count++] = fragment; + } + return (fragments.length == count) ? fragments : Arrays.copyOf(fragments, count); + } + + /** + * Returns the given array of patterns with path separator normalized to {@code '/'}. + * Null or empty patterns are ignored, and duplications are removed. + * + * @param patterns the patterns to normalize + * @param excludes whether the patterns are exclude patterns + * @return normalized patterns without null, empty or duplicated patterns + */ + private static String[] normalizePatterns(final Collection patterns, final boolean excludes) { + if (patterns == null || patterns.isEmpty()) { + return new String[0]; + } + // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. + final var normalized = new LinkedHashSet(patterns.size()); + for (String pattern : patterns) { + if (pattern != null && !pattern.isEmpty()) { + if (pattern.indexOf(':') <= MAVEN_SYNTAX_THRESHOLD) { + pattern = pattern.replace(File.separatorChar, '/'); + if (pattern.endsWith("/")) { + pattern += "**"; + } + // Following are okay only when "**" means "0 or more directories". + while (pattern.endsWith("/**/**")) { + pattern = pattern.substring(0, pattern.length() - 3); + } + while (pattern.startsWith("**/**/")) { + pattern = pattern.substring(3); + } + pattern = pattern.replace("/**/**/", "/**/"); + pattern = pattern.replace("\\", "\\\\") + .replace("[", "\\[") + .replace("]", "\\]") + .replace("{", "\\{") + .replace("}", "\\}"); + normalized.add(DEFAULT_SYNTAX + pattern); + /* + * If the pattern starts or ends with "**", Java GLOB expects a directory level at + * that location while Maven seems to consider that "**" can mean "no directory". + * Add another pattern for reproducing this effect. + */ + addPatternsWithOneDirRemoved(normalized, pattern, 0); + } else { + normalized.add(pattern); + } + } + } + return simplify(normalized, excludes); + } + + /** + * Adds all variants of the given pattern with {@code **} removed. + * This is used for simulating the Maven behavior where {@code "**} may match zero directory. + * Tests suggest that we need an explicit GLOB pattern with no {@code "**"} for matching an absence of directory. + * + * @param patterns where to add the derived patterns + * @param pattern the pattern for which to add derived forms, without the "glob:" syntax prefix + * @param end should be 0 (reserved for recursive invocations of this method) + */ + private static void addPatternsWithOneDirRemoved(final Set patterns, final String pattern, int end) { + final int length = pattern.length(); + int start; + while ((start = pattern.indexOf("**", end)) >= 0) { + end = start + 2; // 2 is the length of "**". + if (end < length) { + if (pattern.charAt(end) != '/') { + continue; + } + if (start == 0) { + end++; // Ommit the leading slash if there is nothing before it. + } + } + if (start > 0 && pattern.charAt(--start) != '/') { + continue; + } + String reduced = pattern.substring(0, start) + pattern.substring(end); + patterns.add(DEFAULT_SYNTAX + reduced); + addPatternsWithOneDirRemoved(patterns, reduced, start); + } + } + + /** + * Applies some heuristic rules for simplifying the set of patterns, + * then returns the patterns as an array. + * + * @param patterns the patterns to simplify and return as an array + * @param excludes whether the patterns are exclude patterns + * @return the set content as an array, after simplification + */ + private static String[] simplify(Set patterns, boolean excludes) { + /* + * If the "**" pattern is present, it makes all other patterns useless. + * In the case of include patterns, an empty set means to include everything. + */ + if (patterns.remove("**")) { + patterns.clear(); + if (excludes) { + patterns.add("**"); + } + } + return patterns.toArray(String[]::new); + } + + /** + * Eventually adds the parent directory of the given patterns, without duplicated values. + * The patterns given to this method should have been normalized. + * + * @param patterns the normalized include or exclude patterns + * @param excludes whether the patterns are exclude patterns + * @return pattens of directories to include or exclude + */ + private static String[] directoryPatterns(final String[] patterns, final boolean excludes) { + // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. + final var directories = new LinkedHashSet(patterns.length); + for (String pattern : patterns) { + if (pattern.startsWith(DEFAULT_SYNTAX)) { + if (excludes) { + if (pattern.endsWith("/**")) { + directories.add(pattern.substring(0, pattern.length() - 3)); + } + } else { + int s = pattern.indexOf(':'); + if (pattern.regionMatches(++s, "**/", 0, 3)) { + s = pattern.indexOf('/', s + 3); + if (s < 0) { + return new String[0]; // Pattern is "**", so we need to accept everything. + } + directories.add(pattern.substring(0, s)); + } + } + } + } + return simplify(directories, excludes); + } + + /** + * Returns {@code true} if at least one pattern requires path to be relativized before to be matched. + * + * @param patterns include or exclude patterns + * @return whether at least one pattern require relativization + */ + private static boolean needRelativize(String[] patterns) { + for (String pattern : patterns) { + if (!pattern.startsWith(DEFAULT_SYNTAX + "**/")) { + return true; + } + } + return false; + } + + /** + * Creates the path matchers for the given patterns. + * The syntax (usually {@value #DEFAULT_SYNTAX}) must be specified for each pattern. + */ + private static PathMatcher[] matchers(final FileSystem fs, final String[] patterns) { + final var matchers = new PathMatcher[patterns.length]; + for (int i = 0; i < patterns.length; i++) { + matchers[i] = fs.getPathMatcher(patterns[i]); + } + return matchers; + } + + /** + * {@return whether there is no include or exclude filters}. + * In such case, this {@code PathSelector} instance should be ignored. + */ + public boolean isEmpty() { + return (includes.length | excludes.length) == 0; + } + + /** + * {@return a potentially simpler matcher equivalent to this matcher}. + */ + @SuppressWarnings("checkstyle:MissingSwitchDefault") + public PathMatcher simplify() { + if (!needRelativize && excludes.length == 0) { + switch (includes.length) { + case 0: + return INCLUDES_ALL; + case 1: + return includes[0]; + } + } + return this; + } + + /** + * Determines whether a path is selected. + * This is true if the given file matches an include pattern and no exclude pattern. + * + * @param path the pathname to test, must not be {@code null} + * @return {@code true} if the given path is selected, {@code false} otherwise + */ + @Override + public boolean matches(Path path) { + if (needRelativize) { + path = baseDirectory.relativize(path); + } + return (includes.length == 0 || isMatched(path, includes)) + && (excludes.length == 0 || !isMatched(path, excludes)); + } + + /** + * {@return whether the given file matches according to one of the given matchers}. + */ + private static boolean isMatched(Path path, PathMatcher[] matchers) { + for (PathMatcher matcher : matchers) { + if (matcher.matches(path)) { + return true; + } + } + return false; + } + + /** + * Determines whether a directory could contain selected paths. + * + * @param directory the directory pathname to test, must not be {@code null} + * @return {@code true} if the given directory might contain selected paths, {@code false} if the + * directory will definitively not contain selected paths + */ + public boolean couldHoldSelected(Path directory) { + if (baseDirectory.equals(directory)) { + return true; + } + directory = baseDirectory.relativize(directory); + return (dirIncludes.length == 0 || isMatched(directory, dirIncludes)) + && (dirExcludes.length == 0 || !isMatched(directory, dirExcludes)); + } + + /** + * {@return a string representation for logging purposes}. + */ + @Override + public String toString() { + var buffer = new StringBuilder(); + Fileset.append(buffer, "includes", includePatterns); + Fileset.append(buffer.append(", "), "excludes", excludePatterns); + return buffer.toString(); + } +} diff --git a/src/main/java/org/apache/maven/plugins/clean/Selector.java b/src/main/java/org/apache/maven/plugins/clean/Selector.java deleted file mode 100644 index ee840489..00000000 --- a/src/main/java/org/apache/maven/plugins/clean/Selector.java +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.maven.plugins.clean; - -import java.io.File; -import java.nio.file.FileSystem; -import java.nio.file.Path; -import java.nio.file.PathMatcher; -import java.util.LinkedHashSet; -import java.util.Set; - -/** - * Determines whether a path is selected for deletion. - * The pathnames used for method parameters will be relative to some base directory - * and use {@code '/'} as separator, regardless the hosting operating system. - * - *

Syntax

- * If a pattern contains the {@code ':'} character, then that pattern is given verbatim to - * {@link FileSystem#getPathMatcher(String)}, which will interpret the part before {@code ':'} - * as the syntax (usually {@code "glob"} or {@code "regex"}). If a pattern does not contain the - * {@code ':'} character, then the syntax defaults to a reproduction of the Maven 3 behavior. - * This is implemented as the {@code "glob"} syntax with the following modifications: - * - *
    - *
  • The platform-specific separator ({@code '\\'} on Windows) is replaced by {@code '/'}. - * Note that it means that the backslash cannot be used for escaping characters.
  • - *
  • Trailing {@code "/"} is completed as {@code "/**"}.
  • - *
  • The {@code "**"} wildcard means "0 or more directories" instead of "1 or more directories". - * This is implemented by adding variants of the pattern without the {@code "**"} wildcard.
  • - *
- * - * If above changes are not desired, put an explicit {@code "glob:"} prefix before the patterns. - * Note that putting such prefix is recommended anyway for better performances. - * - * @author Benjamin Bentmann - * @author Martin Desruisseaux - */ -final class Selector implements PathMatcher { - /** - * Patterns which should be excluded by default, like SCM files. - * - *

Source: this list is copied from {@code plexus-utils-4.0.2} (released in - * September 23, 2024), class {@code org.codehaus.plexus.util.AbstractScanner}.

- */ - private static final String[] DEFAULT_EXCLUDES = { - // Miscellaneous typical temporary files - "**/*~", - "**/#*#", - "**/.#*", - "**/%*%", - "**/._*", - - // CVS - "**/CVS", - "**/CVS/**", - "**/.cvsignore", - - // RCS - "**/RCS", - "**/RCS/**", - - // SCCS - "**/SCCS", - "**/SCCS/**", - - // Visual SourceSafe - "**/vssver.scc", - - // MKS - "**/project.pj", - - // Subversion - "**/.svn", - "**/.svn/**", - - // Arch - "**/.arch-ids", - "**/.arch-ids/**", - - // Bazaar - "**/.bzr", - "**/.bzr/**", - - // SurroundSCM - "**/.MySCMServerInfo", - - // Mac - "**/.DS_Store", - - // Serena Dimensions Version 10 - "**/.metadata", - "**/.metadata/**", - - // Mercurial - "**/.hg", - "**/.hg/**", - - // git - "**/.git", - "**/.git/**", - "**/.gitignore", - - // BitKeeper - "**/BitKeeper", - "**/BitKeeper/**", - "**/ChangeSet", - "**/ChangeSet/**", - - // darcs - "**/_darcs", - "**/_darcs/**", - "**/.darcsrepo", - "**/.darcsrepo/**", - "**/-darcs-backup*", - "**/.darcs-temp-mail" - }; - - /** - * String representation of the normalized include filters. - * This is kept only for {@link #toString()} implementation. - */ - private final String[] includePatterns; - - /** - * String representation of the normalized exclude filters. - * This is kept only for {@link #toString()} implementation. - */ - private final String[] excludePatterns; - - /** - * The matcher for includes. The length of this array is equal to {@link #includePatterns} array length. - */ - private final PathMatcher[] includes; - - /** - * The matcher for excludes. The length of this array is equal to {@link #excludePatterns} array length. - */ - private final PathMatcher[] excludes; - - /** - * The matcher for all directories to include. This array includes the parents of all those directories, - * because they need to be accepted before we can walk to the sub-directories. - * This is an optimization for skipping whole directories when possible. - */ - private final PathMatcher[] dirIncludes; - - /** - * The matcher for directories to exclude. This array does not include the parent directories, - * since they may contain other sub-trees that need to be included. - * This is an optimization for skipping whole directories when possible. - */ - private final PathMatcher[] dirExcludes; - - /** - * The base directory. All files will be relativized to that directory before to be matched. - */ - private final Path baseDirectory; - - /** - * Creates a new selector from the given file seT. - * - * @param fs the user-specified configuration - */ - Selector(Fileset fs) { - includePatterns = normalizePatterns(fs.getIncludes(), false); - excludePatterns = normalizePatterns(addDefaultExcludes(fs.getExcludes(), fs.isUseDefaultExcludes()), true); - baseDirectory = fs.getDirectory(); - FileSystem system = baseDirectory.getFileSystem(); - includes = matchers(system, includePatterns); - excludes = matchers(system, excludePatterns); - dirIncludes = matchers(system, directoryPatterns(includePatterns, false)); - dirExcludes = matchers(system, directoryPatterns(excludePatterns, true)); - } - - /** - * Returns the given array of excludes, optionally expanded with a default set of excludes. - * - * @param excludes the user-specified excludes. - * @param useDefaultExcludes whether to expand user exclude with the set of default excludes - * @return the potentially expanded set of excludes to use - */ - private static String[] addDefaultExcludes(final String[] excludes, final boolean useDefaultExcludes) { - if (!useDefaultExcludes) { - return excludes; - } - String[] defaults = DEFAULT_EXCLUDES; - if (excludes == null || excludes.length == 0) { - return defaults; - } else { - String[] patterns = new String[excludes.length + defaults.length]; - System.arraycopy(excludes, 0, patterns, 0, excludes.length); - System.arraycopy(defaults, 0, patterns, excludes.length, defaults.length); - return patterns; - } - } - - /** - * Returns the given array of patterns with path separator normalized to {@code '/'}. - * Null or empty patterns are ignored, and duplications are removed. - * - * @param patterns the patterns to normalize - * @param excludes whether the patterns are exclude patterns - * @return normalized patterns without null, empty or duplicated patterns - */ - private static String[] normalizePatterns(final String[] patterns, final boolean excludes) { - if (patterns == null) { - return new String[0]; - } - // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. - final var normalized = new LinkedHashSet(patterns.length); - for (String pattern : patterns) { - if (pattern != null && !pattern.isEmpty()) { - final boolean useMavenSyntax = pattern.indexOf(':') < 0; - if (useMavenSyntax) { - pattern = pattern.replace(File.separatorChar, '/'); - if (pattern.endsWith("/")) { - pattern += "**"; - } - // Following are okay only when "**" means "0 or more directories". - while (pattern.endsWith("/**/**")) { - pattern = pattern.substring(0, pattern.length() - 3); - } - while (pattern.startsWith("**/**/")) { - pattern = pattern.substring(3); - } - pattern = pattern.replace("/**/**/", "/**/"); - } - normalized.add(pattern); - /* - * If the pattern starts or ends with "**", Java GLOB expects a directory level at - * that location while Maven seems to consider that "**" can mean "no directory". - * Add another pattern for reproducing this effect. - */ - if (useMavenSyntax) { - addPatternsWithOneDirRemoved(normalized, pattern, 0); - } - } - } - return simplify(normalized, excludes); - } - - /** - * Adds all variants of the given pattern with {@code **} removed. - * This is used for simulating the Maven behavior where {@code "**} may match zero directory. - * Tests suggest that we need an explicit GLOB pattern with no {@code "**"} for matching an absence of directory. - * - * @param patterns where to add the derived patterns - * @param pattern the pattern for which to add derived forms - * @param end should be 0 (reserved for recursive invocations of this method) - */ - private static void addPatternsWithOneDirRemoved(final Set patterns, final String pattern, int end) { - final int length = pattern.length(); - int start; - while ((start = pattern.indexOf("**", end)) >= 0) { - end = start + 2; // 2 is the length of "**". - if (end < length) { - if (pattern.charAt(end) != '/') { - continue; - } - if (start == 0) { - end++; // Ommit the leading slash if there is nothing before it. - } - } - if (start > 0) { - if (pattern.charAt(--start) != '/') { - continue; - } - } - String reduced = pattern.substring(0, start) + pattern.substring(end); - patterns.add(reduced); - addPatternsWithOneDirRemoved(patterns, reduced, start); - } - } - - /** - * Applies some heuristic rules for simplifying the set of patterns, - * then returns the patterns as an array. - * - * @param patterns the patterns to simplify and return asarray - * @param excludes whether the patterns are exclude patterns - * @return the set content as an array, after simplification - */ - private static String[] simplify(Set patterns, boolean excludes) { - /* - * If the "**" pattern is present, it makes all other patterns useless. - * In the case of include patterns, an empty set means to include everything. - */ - if (patterns.remove("**")) { - patterns.clear(); - if (excludes) { - patterns.add("**"); - } - } - return patterns.toArray(String[]::new); - } - - /** - * Eventually adds the parent directory of the given patterns, without duplicated values. - * The patterns given to this method should have been normalized. - * - * @param patterns the normalized include or exclude patterns - * @param excludes whether the patterns are exclude patterns - * @return pattens of directories to include or exclude - */ - private static String[] directoryPatterns(final String[] patterns, final boolean excludes) { - // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. - final var directories = new LinkedHashSet(patterns.length); - for (String pattern : patterns) { - int s = pattern.indexOf(':'); - if (s < 0 || pattern.startsWith("glob:")) { - if (excludes) { - if (pattern.endsWith("/**")) { - directories.add(pattern.substring(0, pattern.length() - 3)); - } - } else { - if (pattern.regionMatches(++s, "**/", 0, 3)) { - s = pattern.indexOf('/', s + 3); - if (s < 0) { - return new String[0]; // Pattern is "**", so we need to accept everything. - } - directories.add(pattern.substring(0, s)); - } - } - } - } - return simplify(directories, excludes); - } - - /** - * Creates the path matchers for the given patterns. - * If no syntax is specified, the default is {@code glob}. - */ - private static PathMatcher[] matchers(final FileSystem fs, final String[] patterns) { - final var matchers = new PathMatcher[patterns.length]; - for (int i = 0; i < patterns.length; i++) { - String pattern = patterns[i]; - if (pattern.indexOf(':') < 0) { - pattern = "glob:" + pattern; - } - matchers[i] = fs.getPathMatcher(pattern); - } - return matchers; - } - - /** - * {@return whether there is no include or exclude filters}. - * In such case, this {@code Selector} instance should be ignored. - */ - boolean isEmpty() { - return (includes.length | excludes.length) == 0; - } - - /** - * Determines whether a path is selected for deletion. - * This is true if the given file matches an include pattern and no exclude pattern. - * - * @param pathname The pathname to test, must not be {@code null} - * @return {@code true} if the given path is selected for deletion, {@code false} otherwise - */ - @Override - public boolean matches(Path path) { - path = baseDirectory.relativize(path); - return (includes.length == 0 || isMatched(path, includes)) - && (excludes.length == 0 || !isMatched(path, excludes)); - } - - /** - * {@return whether the given file matches according one of the given matchers}. - */ - private static boolean isMatched(Path path, PathMatcher[] matchers) { - for (PathMatcher matcher : matchers) { - if (matcher.matches(path)) { - return true; - } - } - return false; - } - - /** - * Determines whether a directory could contain selected paths. - * - * @param directory the directory pathname to test, must not be {@code null} - * @return {@code true} if the given directory might contain selected paths, {@code false} if the - * directory will definitively not contain selected paths - */ - public boolean couldHoldSelected(Path directory) { - if (baseDirectory.equals(directory)) { - return true; - } - directory = baseDirectory.relativize(directory); - return (dirIncludes.length == 0 || isMatched(directory, dirIncludes)) - && (dirExcludes.length == 0 || !isMatched(directory, dirExcludes)); - } - - /** - * {@return a string representation for logging purposes}. - * This string representation is reported logged when {@link Cleaner} is executed. - */ - @Override - public String toString() { - var buffer = new StringBuilder(); - Fileset.append(buffer, "includes", includePatterns); - Fileset.append(buffer.append(", "), "excludes", excludePatterns); - return buffer.toString(); - } -}