All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.expr.parser.PathMap Maven / Gradle / Ivy

There is a newer version: 10.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2013 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.expr.parser;

import net.sf.saxon.Configuration;
import net.sf.saxon.expr.*;
import net.sf.saxon.functions.Doc;
import net.sf.saxon.functions.DocumentFn;
import net.sf.saxon.functions.ResolveURI;
import net.sf.saxon.functions.SystemFunctionCall;
import net.sf.saxon.om.AxisInfo;
import net.sf.saxon.pattern.AnyNodeTest;
import net.sf.saxon.pattern.NodeKindTest;
import net.sf.saxon.pattern.NodeTest;
import net.sf.saxon.query.StaticQueryContext;
import net.sf.saxon.query.XQueryExpression;
import net.sf.saxon.sxpath.XPathEvaluator;
import net.sf.saxon.sxpath.XPathExpression;
import net.sf.saxon.trans.XPathException;

import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;

/**
 * A PathMap is a description of all the paths followed by an expression.
 * It is a set of trees. Each tree contains as its root an expression that selects
 * nodes without any dependency on the context. The arcs in the tree are axis steps.
 * So the expression doc('a.xml')/a[b=2]/c has a single root (the call on doc()), with
 * a single arc representing child::a, this leads to a node which has two further arcs
 * representing child::b and child::c. Because element b is atomized, there will also be
 * an arc for the step descendant::text() indicating the requirement to access the text
 * nodes of the element.
 *
 * 

The current implementation works only for XPath 2.0 expressions (for example, constructs * like xsl:for-each-group are not handled.)

* *

This class, together with the overloaded method * {@link net.sf.saxon.expr.Expression#addToPathMap(PathMap, PathMap.PathMapNodeSet)} can be * seen as an implementation of the static path analysis algorithm given in section 4 of * A. Marian and J. Simeon, * Projecting XML Documents, VLDB 2003.

*/ public class PathMap { /*@NotNull*/ private List pathMapRoots = new ArrayList(); /*@NotNull*/ private HashMap pathsForVariables = new HashMap(); // a map from a variable Binding to a PathMapNodeSet /** * A node in the path map. A node holds a set of arcs, each representing a link to another * node in the path map. */ public static class PathMapNode { List arcs; private boolean returnable; private boolean atomized; private boolean hasUnknownDependencies; /** * Create a node in the PathMap (initially with no arcs) */ private PathMapNode() { arcs = new ArrayList(); } /** * Create a new arc * @param axis the axis of this step * @param test the node test of this step * @return the newly-constructed target of the new arc */ public PathMapNode createArc(byte axis, /*@NotNull*/ NodeTest test) { for (PathMapArc a : arcs) { if (a.getAxis() == axis && a.getNodeTest().equals(test)) { return a.getTarget(); } } PathMapNode target = new PathMapNode(); PathMapArc arc = new PathMapArc(axis, test, target); arcs.add(arc); return target; } /** * Create a new arc to an existing target * @param axis the axis of this step * @param test the node test of this step * @param target the target node of the new arc */ public void createArc(byte axis, /*@NotNull*/ NodeTest test, /*@NotNull*/ PathMapNode target) { for (PathMapArc a : arcs) { if (a.getAxis() == axis && a.getNodeTest().equals(test) && a.getTarget() == target) { // TODO: if it's a different target, then merge the two targets into one. XMark Q8 a.getTarget().setReturnable(a.getTarget().isReturnable() || target.isReturnable()); if (target.isAtomized()) { a.getTarget().setAtomized(); } return; } } PathMapArc arc = new PathMapArc(axis, test, target); arcs.add(arc); } /** * Get the arcs emanating from this node in the PathMap * @return the arcs, each representing an AxisStep. The order of arcs in the array is undefined. */ public PathMapArc[] getArcs() { return arcs.toArray(new PathMapArc[arcs.size()]); } /** * Indicate that the node represents a value that is returnable as the result of the * supplied expression, rather than merely a node that is visited en route * @param returnable true if the node represents a final result of the expression */ public void setReturnable(boolean returnable) { this.returnable = returnable; } /** * Ask whether the node represents a value that is returnable as the result of the * supplied expression, rather than merely a node that is visited en route * @return true if the node represents a final result of the expression */ public boolean isReturnable() { return returnable; } /** * Test whether there are any returnable nodes reachable from this node by * zero or more arcs * @return true if any arcs lead to a pathmap node representing a returnable XDM node */ public boolean hasReachableReturnables() { if (isReturnable()) { return true; } for (PathMapArc arc : arcs) { if (arc.getTarget().hasReachableReturnables()) { return true; } } return false; } /** * Indicate that the typed value or string value of the node reached by this path * will be used. Note that because this is being used only to determine navigation paths, * the property does not need to be set when nodes other than element or document nodes * are atomized. */ public void setAtomized() { this.atomized = true; } /** * Ask whether the typed value (or string value) of the node reached by this path * will be required. * @return true if the typed value or string value of the node is required */ public boolean isAtomized() { return atomized; } /** * Indicate that the path has unknown dependencies, typically because a node reached * by the path is supplied as an argument to a user-defined function */ public void setHasUnknownDependencies() { hasUnknownDependencies = true; } /** * Ask whether the path has unknown dependencies, typically because a node reached * by the path is supplied as an argument to a user-defined function * @return true if the path has unknown dependencies */ public boolean hasUnknownDependencies() { return hasUnknownDependencies; } /** * Determine whether the path is entirely within a streamable snapshot of a streamed document: * that is, it must only navigate to ancestors and to attributes of ancestors * @return true if this path performs navigation other than to ancestors and their attributes */ public boolean allPathsAreWithinStreamableSnapshot() { if (hasUnknownDependencies() || isReturnable() || isAtomized()) { return false; } for (PathMapArc arc : arcs) { int axis = arc.getAxis(); if (axis == AxisInfo.ATTRIBUTE) { PathMapNode next = arc.getTarget(); if (next.isReturnable()) { return false; } if (next.getArcs().length != 0 && !next.allPathsAreWithinStreamableSnapshot()) { return false; } } else if (axis == AxisInfo.SELF || axis == AxisInfo.ANCESTOR || axis == AxisInfo.ANCESTOR_OR_SELF || axis == AxisInfo.PARENT) { PathMapNode next = arc.getTarget(); if (next.isAtomized()) { return false; } if (!next.allPathsAreWithinStreamableSnapshot()) { return false; } } else { return false; } } return true; } } /** * A root node in the path map. A root node represents either (a) a subexpression that is the first step in * a path expression, or (b) a subexpression that is not the first step in a path, but which returns nodes * (for example, a call on the doc() function). */ public static class PathMapRoot extends PathMapNode { private Expression rootExpression; private boolean isDownwardsOnly; /** * Create a PathMapRoot * @param root the expression at the root of a path */ private PathMapRoot(Expression root) { this.rootExpression = root; } /** * Get the root expression * @return the expression at the root of the path */ public Expression getRootExpression() { return rootExpression; } } /** * An arc joining two nodes in the path map. The arc has a target (destination) node, and is * labelled with an AxisExpression representing a step in a path expression */ public static class PathMapArc { private PathMapNode target; private byte axis; private NodeTest test; /** * Create a PathMapArc * @param axis the axis (a constant from class {@link net.sf.saxon.om.AxisInfo} * @param test the node test * @param target the node reached by following this arc */ private PathMapArc(byte axis, /*@NotNull*/ NodeTest test, /*@NotNull*/ PathMapNode target) { this.axis = axis; this.test = test; this.target = target; } /** * Get the Axis associated with this arc * @return the axis, a constant from class {@link net.sf.saxon.om.AxisInfo} */ public byte getAxis() { return axis; } /** * Get the NodeTest associated with this arc * @return the NodeTest */ public NodeTest getNodeTest() { return test; } /** * Get the target node representing the destination of this arc * @return the target node */ public PathMapNode getTarget() { return target; } } /** * A (mutable) set of nodes in the path map */ public static class PathMapNodeSet extends HashSet { /** * Create an initially-empty set of path map nodes */ public PathMapNodeSet() {} /** * Create a set of path map nodes that initially contains a single node * @param singleton the single node to act as the initial content */ public PathMapNodeSet(PathMapNode singleton) { add(singleton); } /** * Create an arc from each node in this node set to a corresponding newly-created * target node * @param axis the axis of the step defining the transition * @param test the node test of the step defining the transition * @return the set of new target nodes */ /*@NotNull*/ public PathMapNodeSet createArc(byte axis, /*@NotNull*/ NodeTest test) { PathMapNodeSet targetSet = new PathMapNodeSet(); for (PathMapNode node : this) { targetSet.add(node.createArc(axis, test)); } return targetSet; } /** * Combine two node sets into one * @param nodes the set of nodes to be added to this set */ public void addNodeSet(/*@Nullable*/ PathMapNodeSet nodes) { if (nodes != null) { for (PathMapNode node : nodes) { add(node); } } } /** * Set the atomized property on all nodes in this nodeset */ public void setAtomized() { for (PathMapNode node : this) { node.setAtomized(); } } /** * Set the returnable property on all nodes in this nodeset */ public void setReturnable(boolean isReturned) { for (PathMapNode node : this) { node.setReturnable(isReturned); } } /** * Test whether there are any returnable nodes reachable from nodes in this nodeset */ public boolean hasReachableReturnables() { for (PathMapNode node : this) { if (node.hasReachableReturnables()) { return true; } } return false; } /** * Determine whether the path is entirely within a streamable snapshot of a streamed document: * that is, it must only navigate to ancestors and to attributes of ancestors */ public boolean allPathsAreWithinStreamableSnapshot() { for (PathMapNode node : this) { if (!node.allPathsAreWithinStreamableSnapshot()) { return false; } } return true; } /** * Indicate that all the descendants of the nodes in this nodeset are required */ public void addDescendants() { for (PathMapNode node : this) { node.createArc(AxisInfo.DESCENDANT, AnyNodeTest.getInstance()); } } /** * Indicate that all the nodes have unknown dependencies */ public void setHasUnknownDependencies() { for (PathMapNode node : this) { node.setHasUnknownDependencies(); } } } /** * Create the PathMap for an expression * @param exp the expression whose PathMap is required */ public PathMap(/*@NotNull*/ Expression exp) { PathMapNodeSet finalNodes = exp.addToPathMap(this, null); if (finalNodes != null) { for (PathMapNode node : finalNodes) { node.setReturnable(true); } } } /** * Make a new root node in the path map. However, if there is already a root for the same * expression, the existing root for that expression is returned. * @param exp the expression represented by this root node * @return the new root node */ public PathMapRoot makeNewRoot(/*@NotNull*/ Expression exp) { for (PathMapRoot r : pathMapRoots) { if (exp.equals(r.getRootExpression())) { return r; } } PathMapRoot root = new PathMapRoot(exp); pathMapRoots.add(root); return root; } /** * Get all the root expressions from the path map * @return an array containing the root expressions */ public PathMapRoot[] getPathMapRoots() { return pathMapRoots.toArray(new PathMapRoot[pathMapRoots.size()]); } /** * Register the path used when evaluating a given variable binding * @param binding the variable binding * @param nodeset the set of PathMap nodes reachable when evaluating that variable */ public void registerPathForVariable(Binding binding, PathMapNodeSet nodeset) { pathsForVariables.put(binding, nodeset); } /** * Get the path used when evaluating a given variable binding * @param binding the variable binding * @return the set of PathMap nodes reachable when evaluating that variable */ public PathMapNodeSet getPathForVariable(Binding binding) { return pathsForVariables.get(binding); } /** * Get the path map root for the context document * @return the path map root for the context document if there is one, or null if none is found. * @throws IllegalStateException if there is more than one path map root for the context document */ /*@Nullable*/ public PathMapRoot getContextDocumentRoot() { //System.err.println("BEFORE REDUCTION:"); //map.diagnosticDump(System.err); PathMap.PathMapRoot[] roots = getPathMapRoots(); PathMapRoot contextRoot = null; for (PathMapRoot root : roots) { PathMapRoot newRoot = reduceToDownwardsAxes(root); if (newRoot.getRootExpression() instanceof RootExpression) { if (contextRoot != null) { throw new IllegalStateException("More than one context document root found in path map"); } else { contextRoot = newRoot; } } } //System.err.println("AFTER REDUCTION:"); //map.diagnosticDump(System.err); return contextRoot; } /** * Get the path map root for the context item * @return the path map root for the context item if there is one, or null if none is found. * @throws IllegalStateException if there is more than one path map root for the context item */ /*@Nullable*/ public PathMapRoot getContextItemRoot() { //System.err.println("BEFORE REDUCTION:"); //map.diagnosticDump(System.err); PathMap.PathMapRoot[] roots = getPathMapRoots(); PathMapRoot contextRoot = null; for (PathMapRoot root : roots) { if (root.getRootExpression() instanceof ContextItemExpression) { if (contextRoot != null) { throw new IllegalStateException("More than one context document root found in path map"); } else { contextRoot = root; } } } return contextRoot; } /** * Get the path map root for a call on the doc() or document() function with a given literal argument * @param requiredUri the literal argument we are looking for * @return the path map root for the specified document if there is one, or null if none is found. * @throws IllegalStateException if there is more than one path map root for the specified document */ /*@Nullable*/ public PathMapRoot getRootForDocument(/*@NotNull*/ String requiredUri) { //System.err.println("BEFORE REDUCTION:"); //map.diagnosticDump(System.err); PathMap.PathMapRoot[] roots = getPathMapRoots(); PathMapRoot requiredRoot = null; for (PathMapRoot root : roots) { PathMapRoot newRoot = reduceToDownwardsAxes(root); Expression exp = newRoot.getRootExpression(); String baseUri; if (exp instanceof Doc) { baseUri = ((Doc) exp).getStaticBaseURI(); } else if (exp instanceof DocumentFn) { baseUri = ((DocumentFn) exp).getStaticBaseURI(); } else { continue; } Expression arg = ((SystemFunctionCall) exp).getArguments()[0]; String suppliedUri = null; if (arg instanceof Literal) { try { String argValue = ((Literal) arg).getValue().getStringValue(); if (baseUri == null) { if (new URI(argValue).isAbsolute()) { suppliedUri = argValue; } else { suppliedUri = null; } } else { suppliedUri = ResolveURI.makeAbsolute(argValue, baseUri).toString(); } } catch (URISyntaxException err) { suppliedUri = null; } catch (XPathException err) { suppliedUri = null; } } if (requiredUri.equals(suppliedUri)) { if (requiredRoot != null) { throw new IllegalStateException("More than one document root found in path map for " + requiredUri); } else { requiredRoot = newRoot; } } } //System.err.println("AFTER REDUCTION:"); //map.diagnosticDump(System.err); return requiredRoot; } /** * Given a PathMapRoot, simplify the tree rooted at this node so that * it only contains downwards selections: specifically, so that the only axes * used are child, attribute, namespace, and descendant. If the root expression * is a ContextItemExpression (that is, the path can start at any node) then it is rebased * to start at a root node, which means in effect that a path such as a/b/c is treated * as //a/b/c. * @param root the root of the path to be simplified * @return the path map root after converting the tree to use downwards axes only */ public PathMapRoot reduceToDownwardsAxes(/*@NotNull*/ PathMapRoot root) { // If the path is rooted at an arbitrary context node, we rebase it to be rooted at the // document root. This involves changing the root to a RootExpression, and changing the axis // for initial steps from child to descendant where necessary if (root.isDownwardsOnly) { return root; } PathMapRoot newRoot = root; if (root.getRootExpression() instanceof ContextItemExpression) { RootExpression slash = new RootExpression(); slash.setContainer(root.getRootExpression().getContainer()); //root.setRootExpression(slash); newRoot = makeNewRoot(slash); for (int i=root.arcs.size()-1; i>=0; i--) { PathMapArc arc = root.arcs.get(i); byte axis = arc.getAxis(); switch (axis) { case AxisInfo.ATTRIBUTE: case AxisInfo.NAMESPACE: { PathMapNode newTarget = new PathMapNode(); newTarget.arcs.add(arc); newRoot.createArc(AxisInfo.DESCENDANT, NodeKindTest.ELEMENT, newTarget); break; } default: { newRoot.createArc(AxisInfo.DESCENDANT_OR_SELF, arc.getNodeTest(), arc.getTarget()); break; } } } for (int i=0; i nodeStack = new Stack(); nodeStack.push(newRoot); reduceToDownwardsAxes(newRoot, nodeStack); newRoot.isDownwardsOnly = true; return newRoot; } /** * Supporting method for {@link #reduceToDownwardsAxes(PathMap.PathMapRoot)} * @param root the root of the path being simplified * @param nodeStack the sequence of nodes by which the current node in the path map was reached. * The node at the bottom of the stack is the root. */ private void reduceToDownwardsAxes(/*@NotNull*/ PathMapRoot root, /*@NotNull*/ Stack nodeStack) { //PathMapArc lastArc = (PathMapArc)arcStack.peek(); //byte lastAxis = lastArc.getStep().getAxis(); PathMapNode node = nodeStack.peek(); if (node.hasUnknownDependencies()) { root.setHasUnknownDependencies(); } for (int i=0; i=0; i--) { PathMapArc thisArc = node.arcs.get(i); //AxisExpression axisStep = thisArc.getStep(); PathMapNode grandParent = (nodeStack.size() < 2 ? null : nodeStack.get(nodeStack.size()-2)); byte lastAxis = -1; if (grandParent != null) { for (PathMapArc arc1 : grandParent.arcs) { PathMapArc arc = (arc1); if (arc.getTarget() == node) { lastAxis = arc.getAxis(); } } } switch (thisArc.getAxis()) { case AxisInfo.ANCESTOR_OR_SELF: case AxisInfo.DESCENDANT_OR_SELF: if (thisArc.getNodeTest() == NodeKindTest.DOCUMENT) { // This is typically an absolute path expression appearing within a predicate node.arcs.remove(i); for (PathMapArc arc : thisArc.getTarget().arcs) { root.arcs.add(arc); } break; } else { // fall through } case AxisInfo.ANCESTOR: case AxisInfo.FOLLOWING: case AxisInfo.PRECEDING: { // replace the axis by a downwards axis from the root if (thisArc.getAxis() != AxisInfo.DESCENDANT_OR_SELF) { root.createArc(AxisInfo.DESCENDANT_OR_SELF, thisArc.getNodeTest(), thisArc.getTarget()); node.arcs.remove(i); } break; } case AxisInfo.ATTRIBUTE: case AxisInfo.CHILD: case AxisInfo.DESCENDANT: case AxisInfo.NAMESPACE: // no action break; case AxisInfo.FOLLOWING_SIBLING: case AxisInfo.PRECEDING_SIBLING: { if (grandParent != null) { grandParent.createArc(lastAxis, thisArc.getNodeTest(), thisArc.getTarget()); node.arcs.remove(i); break; } else { root.createArc(AxisInfo.CHILD, thisArc.getNodeTest(), thisArc.getTarget()); node.arcs.remove(i); break; } } case AxisInfo.PARENT: { if (lastAxis == AxisInfo.CHILD || lastAxis == AxisInfo.ATTRIBUTE || lastAxis == AxisInfo.NAMESPACE) { // ignore the parent step - it leads to somewhere we have already been. // But it might become a returned node if (node.isReturnable()) { grandParent.setReturnable(true); } // any paths after the parent step need to be attached to the grandparent PathMapNode target = thisArc.getTarget(); for (int a=0; a arcs = node.arcs; for (PathMapArc arc : arcs) { out.println(pad + AxisInfo.axisName[arc.axis] + "::" + arc.test.toString() + (arc.target.isAtomized() ? " @" : "") + (arc.target.isReturnable() ? " #" : "") + (arc.target.hasUnknownDependencies() ? " ...??" : "")); showArcs(out, arc.target, indent + 2); } } /** * Main method for testing * @param args Takes one argument, the XPath expression to be analyzed * @throws Exception */ public static void main(String[] args) throws Exception { Configuration config = new Configuration(); Expression exp; if (args[0].equals("xpath")) { XPathEvaluator xpath = new XPathEvaluator(config); XPathExpression xpexp = xpath.createExpression(args[1]); exp = xpexp.getInternalExpression(); } else if (args[0].equals("xquery")) { StaticQueryContext sqc = config.newStaticQueryContext(); sqc.setBaseURI(new File(args[1]).toURI().toString()); XQueryExpression xqe = sqc.compileQuery(new FileReader(args[1])); exp = xqe.getExpression(); } else { throw new IllegalArgumentException("first argument must be xpath or xquery"); } exp.explain(System.err); PathMap initialPath = new PathMap(exp); initialPath.diagnosticDump(System.err); PathMapRoot[] roots = initialPath.getPathMapRoots(); for (PathMapRoot root : roots) { initialPath.reduceToDownwardsAxes(root); } System.err.println("AFTER REDUCTION:"); initialPath.diagnosticDump(System.err); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy