net.sf.saxon.expr.VennExpression Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2015 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.expr;
import net.sf.saxon.Configuration;
import net.sf.saxon.expr.instruct.Block;
import net.sf.saxon.expr.parser.*;
import net.sf.saxon.expr.sort.DocumentSorter;
import net.sf.saxon.expr.sort.GlobalOrderComparer;
import net.sf.saxon.functions.SystemFunction;
import net.sf.saxon.om.AxisInfo;
import net.sf.saxon.om.SequenceIterator;
import net.sf.saxon.pattern.*;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.type.ItemType;
import net.sf.saxon.type.Type;
import net.sf.saxon.type.TypeHierarchy;
import net.sf.saxon.value.Cardinality;
import net.sf.saxon.value.SequenceType;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* An expression representing a nodeset that is a union, difference, or
* intersection of two other NodeSets
*/
public class VennExpression extends BinaryExpression {
/**
* Constructor
*
* @param p1 the left-hand operand
* @param op the operator (union, intersection, or difference)
* @param p2 the right-hand operand
*/
public VennExpression(final Expression p1, final int op, final Expression p2) {
super(p1, op, p2);
}
/**
* Simplify an expression. This performs any static optimization (by rewriting the expression
* as a different expression). The default implementation simplifies its operands.
*
* @return the simplified expression (or the original if unchanged, or if modified in-situ)
* @throws net.sf.saxon.trans.XPathException if an error is discovered during expression
* rewriting
*/
@Override
public Expression simplify() throws XPathException {
// Force both operands to be sorted in document order. If this turns out to be unnecessary, it will
// get optimized away
if (!(getLhsExpression() instanceof DocumentSorter)) {
setLhsExpression(new DocumentSorter(getLhsExpression()));
}
if (!(getRhsExpression() instanceof DocumentSorter)) {
setRhsExpression(new DocumentSorter(getRhsExpression()));
}
super.simplify();
return this;
}
/**
* Get a name identifying the kind of expression, in terms meaningful to a user.
*
* @return a name identifying the kind of expression, in terms meaningful to a user.
* The name will always be in the form of a lexical XML QName, and should match the name used
* in explain() output displaying the expression.
*/
public String getExpressionName() {
switch (operator) {
case Token.UNION:
return "union";
case Token.INTERSECT:
return "intersect";
case Token.EXCEPT:
return "except";
default:
return "unknown";
}
}
/**
* Determine the data type of the items returned by this expression
*
* @return the data type
*/
/*@NotNull*/
public final ItemType getItemType() {
final ItemType t1 = getLhsExpression().getItemType();
if (operator == Token.UNION) {
ItemType t2 = getRhsExpression().getItemType();
TypeHierarchy th = getConfiguration().getTypeHierarchy();
return Type.getCommonSuperType(t1, t2, th);
} else {
return t1;
}
}
/**
* Determine the static cardinality of the expression
*/
public final int computeCardinality() {
final int c1 = getLhsExpression().getCardinality();
final int c2 = getRhsExpression().getCardinality();
switch (operator) {
case Token.UNION:
if (Literal.isEmptySequence(getLhsExpression())) {
return c2;
}
if (Literal.isEmptySequence(getRhsExpression())) {
return c1;
}
return c1 | c2 | StaticProperty.ALLOWS_ONE | StaticProperty.ALLOWS_MANY;
// allows ZERO only if one operand allows ZERO
case Token.INTERSECT:
if (Literal.isEmptySequence(getLhsExpression())) {
return StaticProperty.EMPTY;
}
if (Literal.isEmptySequence(getRhsExpression())) {
return StaticProperty.EMPTY;
}
return (c1 & c2) | StaticProperty.ALLOWS_ZERO | StaticProperty.ALLOWS_ONE;
// allows MANY only if both operands allow MANY
case Token.EXCEPT:
if (Literal.isEmptySequence(getLhsExpression())) {
return StaticProperty.EMPTY;
}
if (Literal.isEmptySequence(getRhsExpression())) {
return c1;
}
return c1 | StaticProperty.ALLOWS_ZERO | StaticProperty.ALLOWS_ONE;
// allows MANY only if first operand allows MANY
}
return StaticProperty.ALLOWS_ZERO_OR_MORE;
}
/**
* Get the static properties of this expression (other than its type). The result is
* bit-signficant. These properties are used for optimizations. In general, if
* property bit is set, it is true, but if it is unset, the value is unknown.
*/
public int computeSpecialProperties() {
final int prop0 = getLhsExpression().getSpecialProperties();
final int prop1 = getRhsExpression().getSpecialProperties();
int props = StaticProperty.ORDERED_NODESET;
if (testContextDocumentNodeSet(prop0, prop1)) {
props |= StaticProperty.CONTEXT_DOCUMENT_NODESET;
}
if (testSubTree(prop0, prop1)) {
props |= StaticProperty.SUBTREE_NODESET;
}
if (!testCreative(prop0, prop1)) {
props |= StaticProperty.NON_CREATIVE;
}
return props;
}
/**
* Determine whether all the nodes in the node-set are guaranteed to
* come from the same document as the context node. Used for optimization.
*
* @param prop0 contains the Context Document Nodeset property of the first operand
* @param prop1 contains the Context Document Nodeset property of the second operand
* @return true if all the nodes come from the context document
*/
private boolean testContextDocumentNodeSet(final int prop0, final int prop1) {
switch (operator) {
case Token.UNION:
return (prop0 & prop1 & StaticProperty.CONTEXT_DOCUMENT_NODESET) != 0;
case Token.INTERSECT:
return ((prop0 | prop1) & StaticProperty.CONTEXT_DOCUMENT_NODESET) != 0;
case Token.EXCEPT:
return (prop0 & StaticProperty.CONTEXT_DOCUMENT_NODESET) != 0;
}
return false;
}
/**
* Gather the component operands of a union or intersect expression
*
* @param operator union or intersect
* @param set the set into which the components are to be gathered. If the operator
* is union, this follows the tree gathering all operands of union expressions. Ditto,
* mutatis mutandis, for intersect expressions.
*/
public void gatherComponents(int operator, Set set) {
if (getLhsExpression() instanceof VennExpression && ((VennExpression) getLhsExpression()).operator == operator) {
((VennExpression) getLhsExpression()).gatherComponents(operator, set);
} else {
set.add(getLhsExpression());
}
if (getRhsExpression() instanceof VennExpression && ((VennExpression) getRhsExpression()).operator == operator) {
((VennExpression) getRhsExpression()).gatherComponents(operator, set);
} else {
set.add(getRhsExpression());
}
}
/**
* Determine whether all the nodes in the node-set are guaranteed to
* come from a subtree rooted at the context node. Used for optimization.
*
* @param prop0 contains the SubTree property of the first operand
* @param prop1 contains the SubTree property of the second operand
* @return true if all the nodes come from the tree rooted at the context node
*/
private boolean testSubTree(final int prop0, final int prop1) {
switch (operator) {
case Token.UNION:
return (prop0 & prop1 & StaticProperty.SUBTREE_NODESET) != 0;
case Token.INTERSECT:
return ((prop0 | prop1) & StaticProperty.SUBTREE_NODESET) != 0;
case Token.EXCEPT:
return (prop0 & StaticProperty.SUBTREE_NODESET) != 0;
}
return false;
}
/**
* Determine whether the expression can create new nodes
*
* @param prop0 contains the noncreative property of the first operand
* @param prop1 contains the noncreative property of the second operand
* @return true if the expression can create new nodes
*/
private boolean testCreative(final int prop0, final int prop1) {
return !(((prop0 & StaticProperty.NON_CREATIVE) != 0) &&
((prop1 & StaticProperty.NON_CREATIVE) != 0));
}
/**
* Type-check the expression
*/
/*@NotNull*/
public Expression typeCheck(ExpressionVisitor visitor, final ContextItemStaticInfo contextInfo) throws XPathException {
typeCheckChildren(visitor, contextInfo);
if (!(getLhsExpression() instanceof Pattern)) {
final RoleDiagnostic role0 = new RoleDiagnostic(RoleDiagnostic.BINARY_EXPR, Token.tokens[operator], 0);
setLhsExpression(TypeChecker.staticTypeCheck(getLhsExpression(), SequenceType.NODE_SEQUENCE, false, role0, visitor));
}
if (!(getRhsExpression() instanceof Pattern)) {
final RoleDiagnostic role1 = new RoleDiagnostic(RoleDiagnostic.BINARY_EXPR, Token.tokens[operator], 1);
setRhsExpression(TypeChecker.staticTypeCheck(getRhsExpression(), SequenceType.NODE_SEQUENCE, false, role1, visitor));
}
// For the intersect and except operators, if the types are disjoint then we can simplify
if (operator != Token.UNION) {
TypeHierarchy th = getConfiguration().getTypeHierarchy();
ItemType t0 = getLhsExpression().getItemType();
ItemType t1 = getRhsExpression().getItemType();
if (th.relationship(t0, t1) == TypeHierarchy.DISJOINT) {
if (operator == Token.INTERSECT) {
return Literal.makeEmptySequence();
} else {
if ((getLhsExpression().getSpecialProperties() & StaticProperty.ORDERED_NODESET) != 0) {
return getLhsExpression();
} else {
return new DocumentSorter(getLhsExpression());
}
}
}
}
return this;
}
/**
* Perform optimisation of an expression and its subexpressions.
*
* This method is called after all references to functions and variables have been resolved
* to the declaration of the function or variable, and after all type checking has been done.
*
* @param visitor an expression visitor
* @param contextItemType the static type of "." at the point where this expression is invoked.
* The parameter is set to null if it is known statically that the context item will be undefined.
* If the type of the context item is not known statically, the argument is set to
* {@link net.sf.saxon.type.Type#ITEM_TYPE}
* @return the original expression, rewritten if appropriate to optimize execution
* @throws net.sf.saxon.trans.XPathException
* if an error is discovered during this phase
* (typically a type error)
*/
/*@NotNull*/
public Expression optimize(ExpressionVisitor visitor, ContextItemStaticInfo contextItemType) throws XPathException {
Expression e = super.optimize(visitor, contextItemType);
if (e != this) {
return e;
}
final Configuration config = getConfiguration();
final TypeHierarchy th = config.getTypeHierarchy();
// If either operand is an empty sequence, simplify the expression. This can happen
// after reduction with constructs of the form //a[condition] | //b[not(condition)],
// common in XPath 1.0 because there were no conditional expressions.
switch (operator) {
case Token.UNION:
if (Literal.isEmptySequence(getLhsExpression()) &&
(getRhsExpression().getSpecialProperties() & StaticProperty.ORDERED_NODESET) != 0) {
return getRhsExpression();
}
if (Literal.isEmptySequence(getRhsExpression()) &&
(getLhsExpression().getSpecialProperties() & StaticProperty.ORDERED_NODESET) != 0) {
return getLhsExpression();
}
break;
case Token.INTERSECT:
if (Literal.isEmptySequence(getLhsExpression())) {
return getLhsExpression();
}
if (Literal.isEmptySequence(getRhsExpression())) {
return getRhsExpression();
}
break;
case Token.EXCEPT:
if (Literal.isEmptySequence(getLhsExpression())) {
return getLhsExpression();
}
if (Literal.isEmptySequence(getRhsExpression()) &&
(getLhsExpression().getSpecialProperties() & StaticProperty.ORDERED_NODESET) != 0) {
return getLhsExpression();
}
break;
}
// If both are axis expressions on the same axis, merge them
// ie. rewrite (axis::test1 | axis::test2) as axis::(test1 | test2)
if (getLhsExpression() instanceof AxisExpression && getRhsExpression() instanceof AxisExpression) {
final AxisExpression a1 = (AxisExpression) getLhsExpression();
final AxisExpression a2 = (AxisExpression) getRhsExpression();
if (a1.getAxis() == a2.getAxis()) {
AxisExpression ax = new AxisExpression(a1.getAxis(),
new CombinedNodeTest(a1.getNodeTest(),
operator,
a2.getNodeTest()));
ExpressionTool.copyLocationInfo(this, ax);
return ax;
}
}
// If both are path expressions starting the same way, merge them
// i.e. rewrite (/X | /Y) as /(X|Y). This applies recursively, so that
// /A/B/C | /A/B/D becomes /A/B/child::(C|D)
// This optimization was previously done for all three operators. However, it's not safe for "except":
// A//B except A//C//B cannot be rewritten as A/descendant-or-self::node()/(B except C//B). As a quick
// fix, the optimization has been retained for "union" but dropped for "intersect" and "except". Need to
// do a more rigorous analysis of the conditions under which it is safe.
// TODO: generalize this code to handle all distributive operators
if (getLhsExpression() instanceof SlashExpression && getRhsExpression() instanceof SlashExpression && operator == Token.UNION) {
final SlashExpression path1 = (SlashExpression) getLhsExpression();
final SlashExpression path2 = (SlashExpression) getRhsExpression();
if (path1.getFirstStep().equals(path2.getFirstStep())) {
final VennExpression venn = new VennExpression(
path1.getRemainingSteps(),
operator,
path2.getRemainingSteps());
ExpressionTool.copyLocationInfo(this, venn);
final Expression path = ExpressionTool.makePathExpression(path1.getFirstStep(), venn, false);
ExpressionTool.copyLocationInfo(this, path);
return path.optimize(visitor, contextItemType);
}
}
// Try merging two non-positional filter expressions:
// A[exp0] | A[exp1] becomes A[exp0 or exp1]
if (getLhsExpression() instanceof FilterExpression && getRhsExpression() instanceof FilterExpression) {
final FilterExpression exp0 = (FilterExpression) getLhsExpression();
final FilterExpression exp1 = (FilterExpression) getRhsExpression();
if (!exp0.isPositional(th) &&
!exp1.isPositional(th) &&
exp0.getSelectExpression().equals(exp1.getSelectExpression())) {
final Expression filter;
switch (operator) {
case Token.UNION:
filter = new OrExpression(exp0.getFilter(),
exp1.getFilter());
break;
case Token.INTERSECT:
filter = new AndExpression(exp0.getFilter(),
exp1.getFilter());
break;
case Token.EXCEPT:
Expression negate2 = SystemFunction.makeCall("not", getRetainedStaticContext(), exp1.getFilter());
filter = new AndExpression(exp0.getFilter(), negate2);
break;
default:
throw new AssertionError("Unknown operator " + operator);
}
ExpressionTool.copyLocationInfo(this, filter);
FilterExpression f = new FilterExpression(exp0.getSelectExpression(), filter);
ExpressionTool.copyLocationInfo(this, f);
return f.simplify().typeCheck(visitor, contextItemType).optimize(visitor, contextItemType);
}
}
// Convert @*|node() into @*,node() to eliminate the sorted merge operation
// Avoid doing this when streaming because xsl:value-of select="@*,node()" is not currently streamable
if (!visitor.isOptimizeForStreaming() && operator == Token.UNION &&
getLhsExpression() instanceof AxisExpression && getRhsExpression() instanceof AxisExpression) {
AxisExpression a0 = (AxisExpression) getLhsExpression();
AxisExpression a1 = (AxisExpression) getRhsExpression();
if (a0.getAxis() == AxisInfo.ATTRIBUTE && a1.getAxis() == AxisInfo.CHILD) {
return new Block(new Expression[]{getLhsExpression(), getRhsExpression()});
} else if (a1.getAxis() == AxisInfo.ATTRIBUTE && a0.getAxis() == AxisInfo.CHILD) {
return new Block(new Expression[]{getRhsExpression(), getLhsExpression()});
}
}
// Convert (A intersect B) to use a serial search where one operand is a singleton
if (operator == Token.INTERSECT && !Cardinality.allowsMany(getLhsExpression().getCardinality())) {
return new SingletonIntersectExpression(getLhsExpression(), operator, getRhsExpression().unordered(false, false));
}
if (operator == Token.INTERSECT && !Cardinality.allowsMany(getRhsExpression().getCardinality())) {
return new SingletonIntersectExpression(getRhsExpression(), operator, getLhsExpression().unordered(false, false));
}
// If the types of the operands are disjoint, simplify "intersect" and "except"
if (operandsAreDisjoint(th)) {
if (operator == Token.INTERSECT) {
return Literal.makeEmptySequence();
} else if (operator == Token.EXCEPT) {
if ((getLhsExpression().getSpecialProperties() & StaticProperty.ORDERED_NODESET) != 0) {
return getLhsExpression();
} else {
return new DocumentSorter(getLhsExpression());
}
}
}
return this;
}
private boolean operandsAreDisjoint(TypeHierarchy th) {
return th.relationship(getLhsExpression().getItemType(), getRhsExpression().getItemType()) == TypeHierarchy.DISJOINT;
}
/**
* Replace this expression by an expression that returns the same result but without
* regard to order
*
* @param retainAllNodes true if all nodes in the result must be retained; false
* if duplicates can be eliminated
* @param forStreaming set to true if optimizing for streaming
*/
@Override
public Expression unordered(boolean retainAllNodes, boolean forStreaming) throws XPathException {
if (operator == Token.UNION && !forStreaming &&
operandsAreDisjoint(getConfiguration().getTypeHierarchy())) {
// replace union operator by comma operator to avoid cost of sorting into document order. See XMark q7
Block block = new Block(new Expression[]{getLhsExpression(), getRhsExpression()});
ExpressionTool.copyLocationInfo(this, block);
return block;
}
return this;
}
/**
* Copy an expression. This makes a deep copy.
*
* @return the copy of the original expression
* @param rebindings
*/
/*@NotNull*/
public Expression copy(RebindingMap rebindings) {
VennExpression exp = new VennExpression(getLhsExpression().copy(rebindings), operator, getRhsExpression().copy(rebindings));
ExpressionTool.copyLocationInfo(this, exp);
return exp;
}
/**
* An implementation of Expression must provide at least one of the methods evaluateItem(), iterate(), or process().
* This method indicates which of these methods is provided directly. The other methods will always be available
* indirectly, using an implementation that relies on one of the other methods.
*
* @return the implementation method, for example {@link #ITERATE_METHOD} or {@link #EVALUATE_METHOD} or
* {@link #PROCESS_METHOD}
*/
@Override
public int getImplementationMethod() {
return ITERATE_METHOD;
}
/**
* Get the operand role (applies to both operands)
* @return the operand role
* @param arg
*/
protected OperandRole getOperandRole(int arg) {
return OperandRole.SAME_FOCUS_ACTION;
}
/**
* Is this expression the same as another expression?
*/
public boolean equals(Object other) {
// NOTE: it's possible that the method in the superclass is already adequate for this
if (other instanceof VennExpression) {
VennExpression b = (VennExpression) other;
if (operator != b.operator) {
return false;
}
if (getLhsExpression().equals(b.getLhsExpression()) && getRhsExpression().equals(b.getRhsExpression())) {
return true;
}
if (operator == Token.UNION || operator == Token.INTERSECT) {
// These are commutative and associative, so for example (A|B)|C equals B|(A|C)
Set s0 = new HashSet(10);
gatherComponents(operator, s0);
Set s1 = new HashSet(10);
((VennExpression) other).gatherComponents(operator, s1);
return s0.equals(s1);
}
}
return false;
}
public int hashCode() {
return getLhsExpression().hashCode() ^ getRhsExpression().hashCode();
}
/**
* Convert this expression to an equivalent XSLT pattern
*
* @param config the Saxon configuration
* @param is30 true if this is XSLT 3.0
* @return the equivalent pattern
* @throws net.sf.saxon.trans.XPathException
* if conversion is not possible
*/
@Override
public Pattern toPattern(Configuration config, boolean is30) throws XPathException {
if (isPredicatePattern(getLhsExpression()) || isPredicatePattern(getRhsExpression())) {
throw new XPathException(
"Cannot use a predicate pattern as an operand of a union, intersect, or except operator",
"XTSE0340");
}
if (operator == Token.UNION) {
return new UnionPattern(
getLhsExpression().toPattern(config, is30),
getRhsExpression().toPattern(config, is30));
} else if (is30) {
if (operator == Token.EXCEPT) {
return new ExceptPattern(
getLhsExpression().toPattern(config, is30),
getRhsExpression().toPattern(config, is30));
} else {
return new IntersectPattern(
getLhsExpression().toPattern(config, is30),
getRhsExpression().toPattern(config, is30));
}
} else {
throw new XPathException("Cannot use intersect or except in an XSLT 2.0 pattern", "XTSE0340");
}
}
private boolean isPredicatePattern(Expression exp) {
if (exp instanceof ItemChecker) {
exp = ((ItemChecker)exp).getBaseExpression();
}
return exp instanceof FilterExpression && (((FilterExpression)exp).getSelectExpression() instanceof ContextItemExpression);
}
/**
* Get the element name used to identify this expression in exported expression format
*
* @return the element name used to identify this expression
*/
@Override
protected String tag() {
if (operator == Token.UNION) {
return "union";
}
return Token.tokens[operator];
}
/**
* Iterate over the value of the expression. The result will always be sorted in document order,
* with duplicates eliminated
*
* @param c The context for evaluation
* @return a SequenceIterator representing the union of the two operands
*/
/*@NotNull*/
public SequenceIterator iterate(final XPathContext c) throws XPathException {
SequenceIterator i1 = getLhsExpression().iterate(c);
SequenceIterator i2 = getRhsExpression().iterate(c);
switch (operator) {
case Token.UNION:
return new UnionEnumeration(i1, i2,
GlobalOrderComparer.getInstance());
case Token.INTERSECT:
return new IntersectionEnumeration(i1, i2,
GlobalOrderComparer.getInstance());
case Token.EXCEPT:
return new DifferenceEnumeration(i1, i2,
GlobalOrderComparer.getInstance());
}
throw new UnsupportedOperationException("Unknown operator in Venn Expression");
}
/**
* Get the effective boolean value. In the case of a union expression, this
* is reduced to an OR expression, for efficiency
*/
public boolean effectiveBooleanValue(final XPathContext context) throws XPathException {
if (operator == Token.UNION) {
// NOTE: this optimization was probably already done statically
return getLhsExpression().effectiveBooleanValue(context) || getRhsExpression().effectiveBooleanValue(context);
} else {
return super.effectiveBooleanValue(context);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy