All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.modeshape.jcr.sequencer.PathExpression Maven / Gradle / Ivy

There is a newer version: 5.4.1.Final
Show newest version
/*
 * ModeShape (http://www.modeshape.org)
 * See the COPYRIGHT.txt file distributed with this work for information
 * regarding copyright ownership.  Some portions may be licensed
 * to Red Hat, Inc. under one or more contributor license agreements.
 * See the AUTHORS.txt file in the distribution for a full listing of 
 * individual contributors. 
 *
 * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
 * is licensed to you under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * ModeShape is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.modeshape.jcr.sequencer;

import java.io.Serializable;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.modeshape.common.annotation.Immutable;
import org.modeshape.common.util.CheckArg;
import org.modeshape.common.util.HashCode;
import org.modeshape.common.util.ObjectUtil;
import org.modeshape.jcr.GraphI18n;

/**
 * An expression that defines an acceptable path using a regular-expression-like language. Path expressions can be used to
 * represent node paths or properties.
 * 

* Let's first look at some simple examples of path expressions: *

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Path expressionDescription
/a/bMatch node "b" that is a child of the top level node "a". Neither node may have any * same-name-sibilings.
/a/*Match any child node of the top level node "a".
/a/*.txtMatch any child node of the top level node "a" that also has a name ending in ".txt".
/a/b@cMatch the property "c" of node "/a/b".
/a/b[2]The second child named "b" below the top level node "a".
/a/b[2,3,4]The second, third or fourth child named "b" below the top level node "a".
/a/b[*]Any (and every) child named "b" below the top level node "a".
//a/bAny node named "b" that exists below a node named "a", regardless of where node "a" * occurs. Again, neither node may have any same-name-sibilings.
*

* With these simple examples, you can probably discern the most important rules. First, the '*' is a wildcard * character that matches any character or sequence of characters in a node's name (or index if appearing in between square * brackets), and can be used in conjunction with other characters (e.g., "*.txt"). *

*

* Second, square brackets (i.e., '[' and ']') are used to match a node's same-name-sibiling index. You * can put a single non-negative number or a comma-separated list of non-negative numbers. Use '0' to match a node that has no * same-name-sibilings, or any positive number to match the specific same-name-sibling. *

*

* Third, combining two delimiters (e.g., "//") matches any sequence of nodes, regardless of what their names are or * how many nodes. Often used with other patterns to identify nodes at any level matching other patterns. Three or more sequential * slash characters are treated as two. *

*

* Many path expressions can be created using just these simple rules. However, input paths can be more complicated. Here are some * more examples: *

* * * * * * * * * * * * * * * * * *
Path expressionsDescription
/a/(b|c|d)Match children of the top level node "a" that are named "a", "b" or "c * ". None of the nodes may have same-name-sibling indexes.
/a/b[c/d]Match node "b" child of the top level node "a", when node "b" has a child named " * c", and "c" has a child named "d". Node "b * " is the selected node, while nodes "b" and "b" are used as criteria but are not selected.
/a(/(b|c|d|)/e)[f/g/@something]Match node "/a/b/e", "/a/c/e", "/a/d/e", or "/a/e * " when they also have a child "f" that itself has a child "g" with property "something". * None of the nodes may have same-name-sibling indexes.
*

* These examples show a few more advanced rules. Parentheses (i.e., '(' and ')') can be used to define * a set of options for names, as shown in the first and third rules. Whatever part of the selected node's path appears between * the parentheses is captured for use within the output path. Thus, the first input path in the previous table would match node " * /a/b", and "b" would be captured and could be used within the output path using "$1", where the * number used in the output path identifies the parentheses. *

*

* Square brackets can also be used to specify criteria on a node's properties or children. Whatever appears in between the square * brackets does not appear in the selected node. *

*

Workspace names

*

* Path expressions can also specify restrictions on the workspace name to constrain the path expression to matching only paths * from certain workspaces meeting the name criteria. Of course, if the path expression doesn't include these restrictions, the * workspace name are not considered when matching paths. *

*/ @Immutable public class PathExpression implements Serializable { /** * Initial version */ private static final long serialVersionUID = 1L; /** * Compile the supplied expression and return the resulting path expression instance. * * @param expression the expression * @return the path expression; never null * @throws IllegalArgumentException if the expression is null * @throws InvalidPathExpressionException if the expression is blank or is not a valid expression */ public static final PathExpression compile( String expression ) throws InvalidPathExpressionException { return new PathExpression(expression); } private static final String SEQUENCE_PATTERN_STRING = "\\[(\\d+(?:,\\d+)*)\\]"; // \[(\d+(,\d+)*)\] private static final Pattern SEQUENCE_PATTERN = Pattern.compile(SEQUENCE_PATTERN_STRING); /** * Regular expression used to find unusable XPath predicates within an expression. This pattern results in unusable predicates * in group 1. Note that some predicates may be valid at the end but not valid elsewhere. *

* Currently, only index-like predicates (including sequences) are allowed everywhere. Predicates with paths and properties * are allowed only as the last predicate. Predicates with any operators are unused. *

*

* Nested predicates are not currently allowed. *

*/ // \[(?:(?:\d+(?:,\d+)*)|\*)\]|(?:\[[^\]\+\-\*=\!><'"\s]+\])$|(\[[^\]]+\]) private static final String UNUSABLE_PREDICATE_PATTERN_STRING = "\\[(?:(?:\\d+(?:,\\d+)*)|\\*)\\]|(?:\\[[^\\]\\+\\-\\*=\\!><'\"\\s]+\\])$|(\\[[^\\]]+\\])"; private static final Pattern UNUSABLE_PREDICATE_PATTERN = Pattern.compile(UNUSABLE_PREDICATE_PATTERN_STRING); /** * Regular expression used to find all XPath predicates except index and sequence patterns. This pattern results in the * predicates to be removed in group 1. */ // \[(?:(?:\d+(?:,\d+)*)|\*)\]|(\[[^\]]+\]) private static final String NON_INDEX_PREDICATE_PATTERN_STRING = "\\[(?:(?:\\d+(?:,\\d+)*)|\\*)\\]|(\\[[^\\]]+\\])"; private static final Pattern NON_INDEX_PREDICATE_PATTERN = Pattern.compile(NON_INDEX_PREDICATE_PATTERN_STRING); /** * The regular expression that is used to extract the workspace name and path from an path expression (or a real path). The * regular expression is (([^:/]*):)?(.*). Group 2 will contain the workspace name and group 3 the path. */ private static final String WORKSPACE_AND_PATH_PATTERN_STRING = "(([^:/]*):)?(.*)"; private static final Pattern WORKSPACE_AND_PATH_PATTERN = Pattern.compile(WORKSPACE_AND_PATH_PATTERN_STRING); private final String expression; /** * This is the pattern that is used to determine if the particular path is from a particular workspace. This pattern will be * null if the expression does not constrain the workspace name. */ private final Pattern workspacePattern; /** * This is the pattern that is used to determine if there is a match with particular paths. */ private final Pattern matchPattern; /** * This is the pattern that is used to determine which parts of the particular input paths are included in the * {@link Matcher#getSelectedNodePath() selected path}, only after the input path has already matched. */ private final Pattern selectPattern; /** * Create the supplied expression. * * @param expression the expression * @throws IllegalArgumentException if the expression is null * @throws InvalidPathExpressionException if the expression is blank or is not a valid expression */ public PathExpression( String expression ) throws InvalidPathExpressionException { CheckArg.isNotNull(expression, "path expression"); this.expression = expression.trim(); if (this.expression.length() == 0) { throw new InvalidPathExpressionException(GraphI18n.pathExpressionMayNotBeBlank.text()); } // Separate out the repository name, workspace name, and path fragments into separate match patterns ... WorkspacePath repoPath = parsePathInWorkspace(this.expression); if (repoPath == null) { throw new InvalidPathExpressionException(GraphI18n.pathExpressionHasInvalidMatch.text(this.expression, this.expression)); } String workPatternStr = repoPath.workspaceName != null ? repoPath.workspaceName : ".*"; String pathPatternStr = repoPath.path; this.workspacePattern = Pattern.compile(workPatternStr); // Build the repository match pattern ... // Build the match pattern, which determines whether a path matches the condition ... String matchString = pathPatternStr; try { matchString = removeUnusedPredicates(matchString); matchString = replaceXPathPatterns(matchString); this.matchPattern = Pattern.compile(matchString, Pattern.CASE_INSENSITIVE); } catch (PatternSyntaxException e) { String msg = GraphI18n.pathExpressionHasInvalidMatch.text(matchString, this.expression); throw new InvalidPathExpressionException(msg, e); } // Build the select pattern, which determines the path that will be selected ... String selectString = pathPatternStr; try { selectString = removeAllPredicatesExceptIndexes(selectString); selectString = replaceXPathPatterns(selectString); selectString = "(" + selectString + ").*"; // group 1 will have selected path ... this.selectPattern = Pattern.compile(selectString, Pattern.CASE_INSENSITIVE); } catch (PatternSyntaxException e) { String msg = GraphI18n.pathExpressionHasInvalidSelect.text(selectString, this.expression); throw new InvalidPathExpressionException(msg, e); } } /** * @return expression */ public String getExpression() { return expression; } /** * Replace certain XPath patterns that are not used or understood. * * @param expression the input regular expressions string; may not be null * @return the regular expression with all unused XPath patterns removed; never null */ protected String removeUnusedPredicates( String expression ) { assert expression != null; java.util.regex.Matcher matcher = UNUSABLE_PREDICATE_PATTERN.matcher(expression); StringBuffer sb = new StringBuffer(); if (matcher.find()) { do { // Remove those predicates that show up in group 1 ... String predicateStr = matcher.group(0); String unusablePredicateStr = matcher.group(1); if (unusablePredicateStr != null) { predicateStr = ""; } matcher.appendReplacement(sb, predicateStr); } while (matcher.find()); matcher.appendTail(sb); expression = sb.toString(); } return expression; } /** * Remove all XPath predicates from the supplied regular expression string. * * @param expression the input regular expressions string; may not be null * @return the regular expression with all XPath predicates removed; never null */ protected String removeAllPredicatesExceptIndexes( String expression ) { assert expression != null; java.util.regex.Matcher matcher = NON_INDEX_PREDICATE_PATTERN.matcher(expression); StringBuffer sb = new StringBuffer(); if (matcher.find()) { do { // Remove those predicates that show up in group 1 ... String predicateStr = matcher.group(0); String unusablePredicateStr = matcher.group(1); if (unusablePredicateStr != null) { predicateStr = ""; } matcher.appendReplacement(sb, predicateStr); } while (matcher.find()); matcher.appendTail(sb); expression = sb.toString(); } return expression; } /** * Replace certain XPath patterns, including some predicates, with substrings that are compatible with regular expressions. * * @param expression the input regular expressions string; may not be null * @return the regular expression with XPath patterns replaced with regular expression fragments; never null */ protected String replaceXPathPatterns( String expression ) { assert expression != null; // replace 2 or more sequential '|' characters in an OR expression expression = expression.replaceAll("[\\|]{2,}", "|"); // if there is an empty expression in an OR expression, make the whole segment optional ... // (e.g., "/a/b/(c|)/d" => "a/b(/(c))?/d" expression = expression.replaceAll("/(\\([^|]+)(\\|){2,}([^)]+\\))", "(/$1$2$3)?"); expression = expression.replaceAll("/\\(\\|+([^)]+)\\)", "(?:/($1))?"); expression = expression.replaceAll("/\\((([^|]+)(\\|[^|]+)*)\\|+\\)", "(?:/($1))?"); // // Allow any path (that doesn't contain an explicit counter) to contain a counter, // // done by replacing any '/' or '|' that isn't preceded by ']' or '*' or '/' or '(' with '(\[\d+\])?/'... // input = input.replaceAll("(?<=[^\\]\\*/(])([/|])", "(?:\\\\[\\\\d+\\\\])?$1"); // Does the path contain any '[]' or '[*]' or '[0]' or '[n]' (where n is any positive integers)... // '[*]/' => '(\[\d+\])?/' expression = expression.replaceAll("\\[\\]", "(?:\\\\[\\\\d+\\\\])?"); // index is optional // '[]/' => '(\[\d+\])?/' expression = expression.replaceAll("\\[[*]\\]", "(?:\\\\[\\\\d+\\\\])?"); // index is optional // '[0]/' => '(\[0\])?/' expression = expression.replaceAll("\\[0\\]", "(?:\\\\[0\\\\])?"); // index is optional // '[n]/' => '\[n\]/' expression = expression.replaceAll("\\[([1-9]\\d*)\\]", "\\\\[$1\\\\]"); // index is required // Change any other end predicates to not be wrapped by braces but to begin with a slash ... // ...'[x]' => ...'/x' expression = expression.replaceAll("(?/
"), any sequence of nodes ("//"), the self reference ("."), or wildcard (" * *", "*[]" or "*[*]"). Combinations of these individual expressions are also * considered to match anything. * * @return true if the expression matches anything, or false otherwise */ public boolean matchesAnything() { return ANYTHING_PATTERN.matcher(expression).matches(); } public static PathExpression all() { return ALL_PATHS_EXPRESSION; } private static final PathExpression ALL_PATHS_EXPRESSION = PathExpression.compile("//"); /** * Parse a path of the form {workspaceName}:{absolutePath} or {absolutePath}. * * @param path the path * @return the workspace path, or null if the supplied path doesn't match any of the path patterns */ public static WorkspacePath parsePathInWorkspace( String path ) { // Extract the workspace name and absPath from the supplied path ... java.util.regex.Matcher pathMatcher = WORKSPACE_AND_PATH_PATTERN.matcher(path); if (!pathMatcher.matches()) { // No match ... return null; } String workspaceName = pathMatcher.group(2); String absolutePath = pathMatcher.group(3); if (workspaceName == null || workspaceName.length() == 0 || workspaceName.trim().length() == 0) workspaceName = null; return new WorkspacePath(workspaceName, absolutePath); } @Immutable public static class WorkspacePath { public final String workspaceName; public final String path; public WorkspacePath( String workspaceName, String path ) { this.workspaceName = workspaceName; this.path = path; } /** * {@inheritDoc} * * @see java.lang.Object#hashCode() */ @Override public int hashCode() { return path.hashCode(); } /** * {@inheritDoc} * * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals( Object obj ) { if (obj == this) return true; if (obj instanceof WorkspacePath) { WorkspacePath that = (WorkspacePath)obj; if (!ObjectUtil.isEqualWithNulls(this.workspaceName, that.workspaceName)) return false; return this.path.equals(that.path); } return false; } /** * {@inheritDoc} * * @see java.lang.Object#toString() */ @Override public String toString() { return (workspaceName != null ? workspaceName : "") + ":" + path; } public WorkspacePath withWorkspaceName( String workspaceName ) { return new WorkspacePath(workspaceName, path); } public WorkspacePath withPath( String path ) { return new WorkspacePath(workspaceName, path); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy