net.sf.saxon.pattern.PatternParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.pattern;
import net.sf.saxon.expr.*;
import net.sf.saxon.expr.instruct.Choose;
import net.sf.saxon.expr.parser.*;
import net.sf.saxon.functions.Doc;
import net.sf.saxon.functions.KeyFn;
import net.sf.saxon.functions.Root_1;
import net.sf.saxon.functions.SuperId;
import net.sf.saxon.lib.Feature;
import net.sf.saxon.om.AxisInfo;
import net.sf.saxon.om.NamespaceUri;
import net.sf.saxon.om.QNameParser;
import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.type.*;
import net.sf.saxon.value.BooleanValue;
import net.sf.saxon.value.Int64Value;
import net.sf.saxon.value.SequenceType;
/**
* Parser for XSLT patterns. This is created by overriding selected parts of the standard ExpressionParser.
*/
public class PatternParser extends XPathParser {
int inPredicate = 0;
/**
* Parse a string representing an XSLT pattern
*
*
* @param pattern the pattern expressed as a String
* @param env the static context for the pattern
* @return a Pattern object representing the result of parsing
* @throws net.sf.saxon.trans.XPathException
* if the pattern contains a syntax error
*/
/*@NotNull*/
public Pattern parsePattern(String pattern, StaticContext env) throws XPathException {
this.env = env;
charChecker = env.getConfiguration().getValidCharacterChecker();
setLanguage(ParsedLanguage.XSLT_PATTERN, env.getXPathVersion());
String trimmed = pattern.trim();
if (trimmed.startsWith("(:")) {
// Strip off any leading comments so that we can safely detect a SelectionPattern
t = new Tokenizer();
t.languageLevel = env.getXPathVersion();
t.tokenize(trimmed, 0, -1);
int start = t.currentTokenStartOffset;
trimmed = trimmed.substring(start);
}
allowXPath40Syntax = env.getConfiguration().getBooleanProperty(Feature.ALLOW_SYNTAX_EXTENSIONS) || env.getXPathVersion() == 40;
if (isSelectionPattern(trimmed)) {
Expression e = parse(pattern, 0, Token.EOF, env);
if (e instanceof Pattern) {
return (Pattern) e;
} else if (e instanceof ContextItemExpression) {
return new UniversalPattern();
} else if (e instanceof FilterExpression) {
Expression predicate = null;
while (e instanceof FilterExpression) {
Expression filter = ((FilterExpression) e).getActionExpression();
e = ((FilterExpression) e).getSelectExpression();
// Need to consider the possibility of a numeric predicate
ItemType filterType = filter.getItemType();
TypeHierarchy th = env.getConfiguration().getTypeHierarchy();
Affinity rel = th.relationship(filterType, NumericType.getInstance());
if (rel != Affinity.DISJOINT) {
// the predicate may be numeric
if (rel == Affinity.SAME_TYPE || rel == Affinity.SUBSUMED_BY) {
// the predicate IS numeric: rewrite as N eq 1, since other values don't match
filter = new ValueComparison(filter, Token.FEQ, Literal.makeLiteral(Int64Value.PLUS_ONE));
} else {
// the predicate MIGHT BE numeric: rewrite as
// let $P := predicate return if ($P instance of xs:numeric) then ($P eq 1) else $P
LetExpression let = new LetExpression();
StructuredQName varName =
new StructuredQName("vv", NamespaceUri.SAXON_GENERATED_VARIABLE, "v" + filter.hashCode());
let.setVariableQName(varName);
InstanceOfExpression condition =
new InstanceOfExpression(new LocalVariableReference(let), SequenceType.SINGLE_NUMERIC);
LocalVariableReference ref = new LocalVariableReference(let);
ref.setStaticType(SequenceType.ANY_SEQUENCE, null, 0);
ValueComparison comparison =
new ValueComparison(ref, Token.FEQ, Literal.makeLiteral(Int64Value.PLUS_ONE));
Choose choice = new Choose(new Expression[]{condition, Literal.makeLiteral(BooleanValue.TRUE)},
new Expression[]{comparison, new LocalVariableReference(let)});
let.setSequence(filter);
let.setAction(choice);
let.setRequiredType(SequenceType.ANY_SEQUENCE);
let.setRetainedStaticContext(env.makeRetainedStaticContext());
filter = let;
}
}
if (predicate == null) {
predicate = filter;
} else {
predicate = new AndExpression(filter, predicate);
}
}
if (e instanceof ContextItemExpression) {
return new BooleanExpressionPattern(predicate);
}
}
grumble("Pattern starting with '.' must be followed by a sequence of predicates");
return null;
} else if (isTypePattern(pattern)) {
this.env = env;
if (qNameParser == null) {
qNameParser = new QNameParser(env.getNamespaceResolver());
if (languageVersion >= 30) {
qNameParser = qNameParser.withAcceptEQName(true);
}
}
language = ParsedLanguage.XSLT_PATTERN;
t = new Tokenizer();
t.languageLevel = env.getXPathVersion();
allowXPath40Syntax =
t.allowSaxonExtensions =
env.getConfiguration().getBooleanProperty(Feature.ALLOW_SYNTAX_EXTENSIONS) || t.languageLevel == 40;
try {
t.tokenize(pattern, 0, -1);
} catch (XPathException err) {
grumble(err.getMessage());
}
ItemType req = parseItemType();
Pattern result = new ItemTypePattern(req);
while (t.currentToken == Token.LSQB) {
nextToken();
Expression predicate = parsePredicate();
expect(Token.RSQB);
nextToken();
result = new BasePatternWithPredicate(result, predicate);
}
expect(Token.EOF);
return result;
} else {
Expression exp = parse(pattern, 0, Token.EOF, env);
exp.setRetainedStaticContext(env.makeRetainedStaticContext());
// If we have a union pattern, check that neither operand is a PredicatePattern
if (exp instanceof VennExpression) {
checkNoPredicatePattern(((VennExpression) exp).getLhsExpression());
checkNoPredicatePattern(((VennExpression) exp).getRhsExpression());
}
ExpressionVisitor visitor = ExpressionVisitor.make(env);
visitor.setOptimizeForPatternMatching(true);
ContextItemStaticInfo cit = visitor.getConfiguration().makeContextItemStaticInfo(AnyNodeTest.getInstance(), true);
Pattern pat;
try {
pat = PatternMaker.fromExpression(exp.simplify().typeCheck(visitor, cit), env.getConfiguration(), true);
} catch (XPathException e) {
pat = PatternMaker.fromExpression(exp.simplify(), env.getConfiguration(), true);
}
pat.setOriginalText(pattern);
if (pat instanceof UnionPattern) {
String[] parts = pattern.split("\\|");
if (parts.length == 2) {
((UnionPattern) pat).p1.setOriginalText(parts[0]);
((UnionPattern) pat).p2.setOriginalText(parts[1]);
}
}
if (exp instanceof FilterExpression && ((FilterExpression)exp).getBase() instanceof ContextItemExpression) {
if (allowXPath40Syntax && (pattern.startsWith("tuple") || pattern.startsWith("map") || pattern.startsWith("array") || pattern.startsWith("union"))) {
// no action, this is OK
} else {
grumble("A predicatePattern can appear only at the outermost level (parentheses not allowed)");
}
}
if (exp instanceof FilterExpression && pat instanceof NodeTestPattern) {
// the pattern has been simplified but needs to retain a default priority based on its syntactic form (test match-058)
pat.setPriority(0.5);
}
return pat;
}
}
private boolean isSelectionPattern(String pattern) {
return pattern.startsWith(".");
}
private boolean isTypePattern(String pattern) throws XPathException {
if (pattern.matches("^(type|record|map|array|union|atomic)\\s*\\(.+")) {
checkLanguageVersion40();
return true;
}
return false;
}
private void checkNoPredicatePattern(Expression exp) throws XPathException {
if (exp instanceof ContextItemExpression) {
grumble("A predicatePattern can appear only at the outermost level (union operator not allowed)");
}
if (exp instanceof FilterExpression) {
checkNoPredicatePattern(((FilterExpression) exp).getBase());
}
if (exp instanceof VennExpression) {
checkNoPredicatePattern(((VennExpression) exp).getLhsExpression());
checkNoPredicatePattern(((VennExpression) exp).getRhsExpression());
}
}
/**
* Callback to tailor the tokenizer
*/
@Override
protected void customizeTokenizer(Tokenizer t) {
// no action
}
/**
* Override the parsing of top-level expressions
*
* @return the parsed expression
* @throws net.sf.saxon.trans.XPathException if the pattern is invalid
*
*/
/*@NotNull*/
@Override
public Expression parseExpression() throws XPathException {
Tokenizer t = getTokenizer();
if (inPredicate > 0) {
return super.parseExpression();
} else if (allowXPath40Syntax && t.currentToken == Token.NODEKIND &&
(t.currentTokenValue.equals("record") || t.currentTokenValue.equals("type") || t.currentTokenValue.equals("map") || t.currentTokenValue.equals("array"))) {
//ItemType type = parserExtension.parseExtendedItemType(this);
ItemType type = parseItemType();
Expression expr = new ItemTypePattern(type);
expr.setRetainedStaticContext(env.makeRetainedStaticContext());
// Expression expr = new InstanceOfExpression(
// new ContextItemExpression(), SequenceType.makeSequenceType(type, StaticProperty.EXACTLY_ONE));
// expr = new FilterExpression(new ContextItemExpression(), expr);
setLocation(expr);
while (t.currentToken == Token.LSQB) {
expr = parsePredicate(expr).toPattern(env.getConfiguration());
}
return expr;
} else if (allowXPath40Syntax && t.currentToken == Token.NODEKIND &&
(t.currentTokenValue.equals("atomic"))) {
nextToken();
expect(Token.NAME);
StructuredQName typeName =
makeStructuredQName(t.currentTokenValue,env.getDefaultElementNamespace());
nextToken();
expect(Token.RPAR);
nextToken();
SchemaType type = env.getConfiguration().getSchemaType(typeName);
if (type == null || !type.isAtomicType()) {
grumble("Unknown atomic type " + typeName);
}
AtomicType at = (AtomicType)type;
Expression expr = new ItemTypePattern(at);
// Expression expr = new InstanceOfExpression(
// new ContextItemExpression(), SequenceType.makeSequenceType(at, StaticProperty.EXACTLY_ONE));
// expr = new FilterExpression(new ContextItemExpression(), expr);
setLocation(expr);
while (t.currentToken == Token.LSQB) {
expr = parsePredicate(expr);
}
return expr;
} else {
return parseBinaryExpression(parsePathExpression(), 10);
}
}
/**
* Parse a basic step expression (without the predicates)
*
* @param firstInPattern true only if we are parsing the first step in a
* RelativePathPattern in the XSLT Pattern syntax
* @return the resulting subexpression
* @throws net.sf.saxon.trans.XPathException
* if any error is encountered
*/
/*@NotNull*/
@Override
protected Expression parseBasicStep(boolean firstInPattern) throws XPathException {
if (inPredicate > 0) {
return super.parseBasicStep(firstInPattern);
} else {
switch (t.currentToken) {
case Token.DOLLAR:
if (!firstInPattern) {
grumble("In an XSLT 3.0 pattern, a variable reference is allowed only as the first step in a path");
return null;
} else {
return super.parseBasicStep(firstInPattern);
}
case Token.STRING_LITERAL:
case Token.NUMBER:
case Token.KEYWORD_CURLY:
case Token.ELEMENT_QNAME:
case Token.ATTRIBUTE_QNAME:
case Token.NAMESPACE_QNAME:
case Token.PI_QNAME:
case Token.TAG:
case Token.NAMED_FUNCTION_REF:
case Token.DOTDOT:
grumble("Token " + currentTokenDisplay() + " not allowed here in an XSLT pattern");
return null;
case Token.FUNCTION:
if (!firstInPattern) {
grumble("In an XSLT pattern, a function call is allowed only as the first step in a path");
}
return super.parseBasicStep(firstInPattern);
case Token.NODEKIND:
switch (t.currentTokenValue) {
case "type":
case "tuple":
case "union":
case "map":
case "array":
case "atomic":
return parserExtension.parseTypePattern(this);
default:
return super.parseBasicStep(firstInPattern);
}
default:
return super.parseBasicStep(firstInPattern);
}
}
}
@Override
protected void testPermittedAxis(int axis, String errorCode) throws XPathException {
super.testPermittedAxis(axis, errorCode);
if (inPredicate == 0) {
if (!AxisInfo.isSubtreeAxis[axis]) {
grumble("The " + AxisInfo.axisName[axis] + " is not allowed in a pattern");
}
}
}
/**
* Parse an expression appearing within a predicate. This enables full XPath parsing, without
* the normal rules that apply within an XSLT pattern
*
* @return the parsed expression that appears within the predicate
* @throws net.sf.saxon.trans.XPathException if the predicate is invalid
*
*/
/*@NotNull*/
@Override
protected Expression parsePredicate() throws XPathException {
boolean disallow = t.disallowUnionKeyword;
t.disallowUnionKeyword = false;
++inPredicate;
Expression exp = parseExpression();
--inPredicate;
t.disallowUnionKeyword = disallow;
return exp;
}
/**
* Parse a function call appearing within a pattern. Unless within a predicate, this
* imposes the constraints on which function calls are allowed to appear in a pattern
*
* @return the expression that results from the parsing (usually a FunctionCall)
* @throws net.sf.saxon.trans.XPathException if the function call is invalid
*
* @param prefixArgument left hand operand of arrow operator,
* or null in the case of a conventional function call
*/
/*@NotNull*/
@Override
public Expression parseFunctionCall(Expression prefixArgument) throws XPathException {
Expression fn = super.parseFunctionCall(prefixArgument);
if (inPredicate <= 0 && !fn.isCallOn(SuperId.class) && !fn.isCallOn(KeyFn.class) &&
!fn.isCallOn(Doc.class) && !fn.isCallOn(Root_1.class)) {
grumble("The " + fn + " function is not allowed at the head of a pattern");
}
return fn;
}
@Override
public Expression parseFunctionArgument() throws XPathException {
if (inPredicate > 0) {
return super.parseFunctionArgument();
} else {
switch (t.currentToken) {
case Token.DOLLAR:
return parseVariableReference();
case Token.STRING_LITERAL:
return parseStringLiteral(true);
case Token.NUMBER:
return parseNumericLiteral(true);
default:
grumble("A function argument in an XSLT pattern must be a variable reference or literal");
return null;
}
}
}
@Override
public Expression makeTracer(Expression exp, StructuredQName qName) {
// Suppress tracing of pattern evaluation
return exp;
}
}