All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.functions.DocumentFn Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.functions;

import net.sf.saxon.Configuration;
import net.sf.saxon.Controller;
import net.sf.saxon.event.*;
import net.sf.saxon.expr.*;
import net.sf.saxon.expr.parser.PathMap;
import net.sf.saxon.expr.sort.DocumentOrderIterator;
import net.sf.saxon.expr.sort.GlobalOrderComparer;
import net.sf.saxon.lib.*;
import net.sf.saxon.om.*;
import net.sf.saxon.s9api.Location;
import net.sf.saxon.style.StylesheetPackage;
import net.sf.saxon.trans.Err;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.trans.XsltController;
import net.sf.saxon.tree.tiny.TinyBuilder;
import net.sf.saxon.value.Cardinality;

import javax.xml.transform.Source;
import javax.xml.transform.SourceLocator;
import javax.xml.transform.TransformerException;
import javax.xml.transform.dom.DOMSource;
import java.net.URI;
import java.net.URISyntaxException;


/**
 * Implements the XSLT document() function
 */

public class DocumentFn extends SystemFunction implements Callable {

    private Location location;

    /**
     * Determine the static cardinality
     */

    @Override
    public int getCardinality(Expression[] arguments) {
        Expression expression = arguments[0];
        if (Cardinality.allowsMany(expression.getCardinality())) {
            return StaticProperty.ALLOWS_ZERO_OR_MORE;
        } else {
            return StaticProperty.ALLOWS_ZERO_OR_ONE;
        }
        // may have to revise this if the argument can be a list-valued element or attribute
    }

    /**
     * Get the static properties of this expression (other than its type). The result is
     * bit-signficant. These properties are used for optimizations. In general, if
     * property bit is set, it is true, but if it is unset, the value is unknown.
     * @param arguments the expressions supplied as arguments to the function call
     */

    @Override
    public int getSpecialProperties(Expression[] arguments) {
        return StaticProperty.ORDERED_NODESET |
                StaticProperty.PEER_NODESET |
                StaticProperty.NO_NODES_NEWLY_CREATED;
        // Declaring it as a peer node-set expression avoids sorting of expressions such as
        // document(XXX)/a/b/c
        // The document() function might appear to be creative: but it isn't, because multiple calls
        // with the same arguments will produce identical results.
    }

    @Override
    public Expression makeFunctionCall(Expression... arguments) {
        location = arguments[0].getLocation();
        Expression expr = Doc.maybePreEvaluate(this, arguments);
        return expr == null ? super.makeFunctionCall(arguments) : expr;
    }

    @SuppressWarnings("RedundantIfStatement")
    public static boolean sourceIsTree(Source source) {
        if (source instanceof NodeSource) {
            return true;
        }
        if (source instanceof NodeInfo) {
            return true;
        }
        if (source instanceof DOMSource) {
            return true;
        }
        return false;
    }

    /**
     * Evaluate the expression
     *
     * @param context   the dynamic evaluation context
     * @param arguments the values of the arguments, supplied as SequenceIterators
     * @return the result of the evaluation, in the form of a SequenceIterator
     * @throws net.sf.saxon.trans.XPathException
     *          if a dynamic error occurs during the evaluation of the expression
     */
    @Override
    public Sequence call(XPathContext context, Sequence[] arguments) throws XPathException {
        int numArgs = getArity();

        SequenceIterator hrefSequence = arguments[0].iterate();
        String baseURI = null;
        if (numArgs == 2) {
            // we can trust the type checking: it must be a node
            NodeInfo base = (NodeInfo) arguments[1].head();
            baseURI = base.getBaseURI();
            if (baseURI == null) {
                throw new XPathException("The second argument to document() is a node with no base URI", "XTDE1162");
            }
        }

        DocumentMappingFunction map = new DocumentMappingFunction(context);
        map.baseURI = baseURI;
        map.stylesheetURI = getStaticBaseUriString();
        map.packageData = getRetainedStaticContext().getPackageData();
        map.locator = location;

        ItemMappingIterator iter = new ItemMappingIterator(hrefSequence, map);

        //if (Cardinality.allowsMany(expression.getCardinality())) {
            return SequenceTool.toLazySequence(new DocumentOrderIterator(iter, GlobalOrderComparer.getInstance()));
            // this is to make sure we eliminate duplicates: two href's might be the same
//        } else {
//            return SequenceTool.toLazySequence(iter);
//        }
    }

    private static class DocumentMappingFunction implements ItemMappingFunction {

        public String baseURI;
        public String stylesheetURI;
        public Location locator;
        public PackageData packageData;
        public XPathContext context;

        public DocumentMappingFunction(XPathContext context) {
            this.context = context;
        }

        @Override
        public Item mapItem(Item item) throws XPathException {
            String b = baseURI;
            if (b == null) {
                if (item instanceof NodeInfo) {
                    b = ((NodeInfo) item).getBaseURI();
                } else {
                    b = stylesheetURI;
                }
            }
            return makeDoc(item.getStringValue(), b, packageData, null, context, locator, false);
        }
    }

    /**
     * Supporting routine to load one external document given a URI (href) and a baseURI. This is used
     * in the normal case when a document is loaded at run-time (that is, when a Controller is available)
     *
     *
     * @param href    the relative URI
     * @param baseURI the base URI
     * @param packageData the stylesheet (or other) package in which the call appears
     * @param options parse options to be used. May be null.
     * @param c       the dynamic XPath context
     * @param locator used to identify the location of the instruction in event of error
     * @param silent  if true, errors should not be notified to the ErrorListener
     * @return the root of the constructed document, or the selected element within the document
     *         if a fragment identifier was supplied, or null if the URIResolver returns an
     *         instance of {@link EmptySource};
     * @throws XPathException if reading or parsing the document fails
     */

    public static NodeInfo makeDoc(String href, String baseURI, PackageData packageData, ParseOptions options, XPathContext c, Location locator, boolean silent)
            throws XPathException {

        Configuration config = c.getConfiguration();

        // If the href contains a fragment identifier, strip it out now
        String[] parts = extractFragment(href);
        String fragmentId = parts[1];
        href = parts[0];

        // Extract any query part
        URIQueryParameters params = null;
        if (config.getBooleanProperty(Feature.RECOGNIZE_URI_QUERY_PARAMETERS)) {
            int qMark = href.indexOf('?');
            if (qMark >= 0) {
                params = new URIQueryParameters(href.substring(qMark+1), config);
                href = href.substring(0, qMark);
            }
        }

        Controller controller = c.getController();
        if (controller == null) {
            throw new XPathException("doc() function is not available in this environment");
        }

        // Resolve relative URI
        DocumentKey documentKey = computeDocumentKey(href, baseURI, packageData, c);

        // see if the document is already loaded

        TreeInfo doc = config.getGlobalDocumentPool().find(documentKey);
        if (doc != null) {
            return doc.getRootNode();
        }

        DocumentPool pool = controller.getDocumentPool();

        //noinspection SynchronizationOnLocalVariableOrMethodParameter
        synchronized (controller) {
            doc = pool.find(documentKey);
            if (doc != null) {
                return getFragment(doc, fragmentId, c, locator);
            }

            // check that the document was not written by this transformation

            if (controller instanceof XsltController &&
                    !((XsltController)controller).checkUniqueOutputDestination(documentKey)) {
                pool.markUnavailable(documentKey);
                XPathException err = new XPathException(
                        "Cannot read a document that was written during the same transformation: " + documentKey);
                err.setXPathContext(c);
                err.setErrorCode("XTRE1500");
                err.setLocator(locator);
                throw err;
            }

            if (pool.isMarkedUnavailable(documentKey)) {
                XPathException err = new XPathException(
                        "Document has been marked not available: " + documentKey);
                err.setXPathContext(c);
                err.setErrorCode("FODC0002");
                err.setLocator(locator);
                throw err;
            }
        }

        try {

            // Get a Source from the URIResolver

            Source source = resolveURI(href, baseURI, documentKey.toString(), c);

            if (source == null || source instanceof EmptySource) {
                return null;
            }

            //System.err.println("URI resolver returned " + source.getClass() + " " + source.getSystemId());
            source = config.getSourceResolver().resolveSource(source, config);
            //System.err.println("Resolved source " + source.getClass() + " " + source.getSystemId());

            TreeInfo newdoc;
            if (sourceIsTree(source)) {
                NodeInfo startNode = controller.prepareInputTree(source);
                newdoc = startNode.getTreeInfo();
            } else {
                Builder b = controller.makeBuilder();
                b.setDurability(Durability.LASTING);
                b.setUseEventLocation(true);
                if (b instanceof TinyBuilder) {
                    ((TinyBuilder) b).setStatistics(config.getTreeStatistics().SOURCE_DOCUMENT_STATISTICS);
                }

                Receiver s = b;
                if (options == null) {
                    options = b.getPipelineConfiguration().getParseOptions();
                    if (packageData instanceof StylesheetPackage) {
                        SpaceStrippingRule rule = ((StylesheetPackage)packageData).getSpaceStrippingRule();
                        if (rule != NoElementsSpaceStrippingRule.getInstance()) {
                            options = options.withSpaceStrippingRule(rule);
                        }
                    }
                    options = options.withSchemaValidationMode(controller.getSchemaValidationMode());
                }

                if (params != null) {
                    options = options.merge(params.makeParseOptions(config));
                }
                b.getPipelineConfiguration().setParseOptions(options);

                if (options.isLineNumbering()) {
                    b.setLineNumbering(true);
                }
                if (silent) {
                    StandardErrorHandler eh = new StandardErrorHandler(controller.getErrorReporter());
                    eh.setSilent(true);
                    options = options.withErrorHandler(eh);
                }
                if (packageData instanceof StylesheetPackage && ((StylesheetPackage)packageData).isStripsTypeAnnotations()) {
                    s = config.getAnnotationStripper(s);
                }

                PathMap map = controller.getPathMapForDocumentProjection();
                if (map != null) {
                    PathMap.PathMapRoot pathRoot = map.getRootForDocument(documentKey.toString());
                    if (pathRoot != null && !pathRoot.isReturnable() && !pathRoot.hasUnknownDependencies()) {
                        options = options.withFilter(config.makeDocumentProjector(pathRoot));
                    }
                }
                s.setPipelineConfiguration(b.getPipelineConfiguration());
                try {
                    Sender.send(source, s, options);
                    newdoc = b.getCurrentRoot().getTreeInfo();
                    b.reset();
                } catch (XPathException err) {
                    if (err.getErrorCodeLocalPart() == null || err.getErrorCodeLocalPart().equals("SXXP0003")) {
                        err.setErrorCode("FODC0002");
                    }
                    err.maybeSetLocation(locator);
                    err.maybeSetContext(c);
                    throw err;
                } finally {
                    if (options.isPleaseCloseAfterUse()) {
                        ParseOptions.close(source);
                    }
                }
            }

            // At this point, we have built the document. But it's possible that another thread
            // has built the same document and put it in the document pool. So we do another
            // check on the document pool, and if this has happened, we discard the document
            // we have just built and use the one from the pool instead.
            //noinspection SynchronizationOnLocalVariableOrMethodParameter
            synchronized (controller) {
                doc = pool.find(documentKey);
                if (doc != null) {
                    return getFragment(doc, fragmentId, c, locator);
                }
                controller.registerDocument(newdoc, documentKey);
                if (controller instanceof XsltController) {
                    ((XsltController) controller).addUnavailableOutputDestination(documentKey);
                }
            }
            return getFragment(newdoc, fragmentId, c, locator);

        } catch (TransformerException err) {
            pool.markUnavailable(documentKey);
            XPathException xerr = XPathException.makeXPathException(err);
            xerr.maybeSetLocation(locator);
            String code = (err.getException() instanceof URISyntaxException) ? "FODC0005" : "FODC0002";
            xerr.maybeSetErrorCode(code);
            throw xerr;
        }
    }

    private static String[] extractFragment(String href) throws XPathException {
        // If the href contains a fragment identifier, strip it out now
        //System.err.println("Entering makeDoc " + href);
        int hash = href.indexOf('#');

        String fragmentId = null;
        if (hash >= 0) {
            if (hash == href.length() - 1) {
                // If there's a # sign at end - just ignore it
                href = href.substring(0, hash);
            } else {
                fragmentId = href.substring(hash + 1);
                href = href.substring(0, hash);
                if (!NameChecker.isValidNCName(fragmentId)) {
                    XPathException de = new XPathException("The fragment identifier " + Err.wrap(fragmentId) + " is not a valid NCName");
                    de.setErrorCode("XTDE1160");
                    throw de;
                }
            }
        }
        return new String[]{href, fragmentId};
    }

    /**
     * Call the URIResolver to resolve a URI
     *
     * @param href        the supplied relative URI, stripped of any fragment identifier
     * @param baseURI     the base URI
     * @param documentKey the absolute URI if already available, or null otherwise
     * @param context  the dynamic context
     * @return a Source representing the document to be read
     * @throws XPathException if the relative URI cannot be resolved
     */

    public static Source resolveURI(String href, String baseURI, String documentKey, XPathContext context)
            throws XPathException {
        Configuration config = context.getConfiguration();
        ResourceResolver resolver = context.getResourceResolver();
        if (href.contains(" ")) {
            href = ResolveURI.escapeSpaces(href);
        }

        if (baseURI == null) {
            try {
                URI uri = new URI(href);
                if (!uri.isAbsolute()) {
                    throw new XPathException("Relative URI passed to document() function (" + href +
                            "); but no base URI is available", "XTDE1162");
                }
            } catch (URISyntaxException e) {
                throw new XPathException("Invalid URI passed to document() function: " + href, "FODC0005");
            }
        }

        ResourceRequest request = new ResourceRequest();
        request.relativeUri = href;
        if (baseURI != null) {
            request.baseUri = baseURI;
            request.uri = documentKey;
        } else {
            request.uri = href;
        }
        request.nature = ResourceRequest.XML_NATURE;
        request.purpose = ResourceRequest.ANY_PURPOSE;
        try {
            return request.resolve(resolver, config.getResourceResolver(), new DirectResourceResolver(config));
        } catch (XPathException err) {
            err.setErrorCode("FODC0005");
            err.maybeSetContext(context);
            throw err;
        } catch (Exception ex) {
            XPathException de = new XPathException("Exception thrown by URIResolver resolving `"
                                                           + href + "` against `" + baseURI + "'", ex);
            if (config.getBooleanProperty(Feature.TRACE_EXTERNAL_FUNCTIONS)) {
                ex.printStackTrace();
            }
            throw de;
        }
    }

    /**
     * Compute a document key
     */

    protected static DocumentKey computeDocumentKey(String href, String baseURI, PackageData packageData, XPathContext c) throws XPathException {
        return computeDocumentKey(href, baseURI, packageData, true);
    }

    /**
     * Compute a document key (an absolute URI that can be used to see if a document is already loaded)
     * @param href     the relative URI
     * @param baseURI  the base URI
     * @param packageData the package in which the call to doc() or document() appears (affects options
 *                    such as strip-space)
     * @param strip true if the document is subject to whitespace stripping (typically a source document),
     *              otherwise false
     */
    public static DocumentKey computeDocumentKey(
            String href, String baseURI, PackageData packageData, boolean strip) {
        String absURI;

        // Saxon takes charge of absolutization, leaving the user URIResolver to handle dereferencing only
        href = ResolveURI.escapeSpaces(href);
        if (baseURI == null) {    // no base URI available
            try {
                // the href might be an absolute URL
                absURI = new URI(href).toString();
            } catch (URISyntaxException err) {
                // it isn't; but the URI resolver might know how to cope
                absURI = '/' + href;
            }
        } else if (href.isEmpty()) {
            // common case in XSLT, which java.net.URI#resolve() does not handle correctly
            absURI = baseURI;
        } else {
            try {
                absURI = ResolveURI.makeAbsolute(href, baseURI).toString();
            } catch (URISyntaxException | IllegalArgumentException err) {
                absURI = baseURI + "/../" + href;
            }
        }

        if (strip && packageData instanceof StylesheetPackage &&
                ((StylesheetPackage) packageData).getSpaceStrippingRule() != NoElementsSpaceStrippingRule.getInstance()) {
            String name = ((StylesheetPackage) packageData).getPackageName();
            if (name != null) {
                return new DocumentKey(absURI, name, ((StylesheetPackage) packageData).getPackageVersion());
            }
        }
        return new DocumentKey(absURI);
    }

    /**
     * Supporting routine to load one external document given a URI (href) and a baseURI. This is used
     * when the document is pre-loaded at compile time.
     *
     * @param href    the relative URI. This must not contain a fragment identifier
     * @param baseURI the base URI
     * @param config  the Saxon configuration
     * @param locator used to identify the location of the instruction in event of error. May be null.
     * @return the root of the constructed document, or the selected element within the document
     *         if a fragment identifier was supplied
     */

    public static NodeInfo preLoadDoc(String href, String baseURI, PackageData packageData, Configuration config, SourceLocator locator)
            throws XPathException {

        int hash = href.indexOf('#');
        if (hash >= 0) {
            throw new XPathException("Fragment identifier not supported for preloaded documents");
        }

        // Extract any query part
        URIQueryParameters params = null;
        if (config.getBooleanProperty(Feature.RECOGNIZE_URI_QUERY_PARAMETERS)) {
            int qMark = href.indexOf('?');
            if (qMark >= 0) {
                params = new URIQueryParameters(href.substring(qMark + 1), config);
                href = href.substring(0, qMark);
            }
        }

        DocumentKey documentKey = computeDocumentKey(href, baseURI, packageData, true);

        // see if the document is already loaded

        TreeInfo doc = config.getGlobalDocumentPool().find(documentKey);
        if (doc != null) {
            return doc.getRootNode();
        }

        ResourceRequest rr = new ResourceRequest();
        rr.relativeUri = href;
        rr.baseUri = baseURI;
        rr.uri = documentKey.getAbsoluteURI();

        Source source;
        try {
            // Get a Source from the URIResolver
            source = rr.resolve(config.getResourceResolver(), new DirectResourceResolver(config));
        } catch (Exception ex) {
            XPathException de = new XPathException("Exception thrown by ResourceResolver", ex);
            if (config.getBooleanProperty(Feature.TRACE_EXTERNAL_FUNCTIONS)) {
                ex.printStackTrace();
            }
            de.setLocator(locator);
            throw de;
        }

        ParseOptions options = config.getParseOptions();
        if (params != null) {
            options = options.merge(params.makeParseOptions(config));
        }

        TreeInfo newdoc = config.buildDocumentTree(source, options);
        config.getGlobalDocumentPool().add(newdoc, documentKey);
        return newdoc.getRootNode();


    }



    /**
     * Supporting routine to push one external document given a URI (href) and a baseURI to a given Receiver.
     * This method cannot handle fragment identifiers
     *
     * @param href    the relative URI
     * @param baseURI the base URI
     * @param packageData the containing XSLT package
     * @param context the XPath dynamic context
     * @param locator used to identify the location of the instruction in case of error
     * @param out     the destination where the document is to be sent
     */

    public static void sendDoc(String href,
                               String baseURI,
                               PackageData packageData,
                               XPathContext context,
                               Location locator,
                               Receiver out,
                               ParseOptions parseOptions) throws XPathException {

        PipelineConfiguration pipe = out.getPipelineConfiguration();
        if (pipe == null) {
            pipe = context.getController().makePipelineConfiguration();
            pipe.setXPathContext(context);
            out.setPipelineConfiguration(pipe);
        }

        String[] parts = extractFragment(href);
        if (parts[1] != null) {
            href = parts[0];
            out = new IDFilter(out, parts[1]);
        }

        // Resolve relative URI

        DocumentKey documentKey = computeDocumentKey(href, baseURI, packageData, true);

        Controller controller = context.getController();
        Configuration config = controller.getConfiguration();

        // see if the document is already loaded

        TreeInfo doc = controller.getDocumentPool().find(documentKey);
        Source source = null;
        if (doc != null) {
            source = doc.getRootNode().asActiveSource();
        } else {

            try {
                // Get a Source from the resource resolver
                ResourceRequest request = new ResourceRequest();
                request.baseUri = baseURI;
                request.relativeUri = href;
                request.uri = documentKey.getAbsoluteURI();
                request.streamable = true;
                request.nature = ResourceRequest.XML_NATURE;
                request.purpose = ResourceRequest.ANY_PURPOSE;

                source = request.resolve(context.getResourceResolver(),
                                                config.getResourceResolver(),
                                                new DirectResourceResolver(config));
                if (source == null) {
                    XPathException xerr = new XPathException("Failed to resolve streamed document URI " + request.uri, "FODC0005");
                    xerr.setLocator(locator);
                    throw xerr;
                }
                if (sourceIsTree(source)) {
                    NodeInfo startNode = controller.prepareInputTree(source);
                    source = startNode.getRoot().asActiveSource();
                }
            } catch (TransformerException err) {
                XPathException xerr = XPathException.makeXPathException(err);
                xerr.setLocator(locator);
                xerr.maybeSetErrorCode("FODC0005");
                throw xerr;
            }
        }

        if (controller.getConfiguration().isTiming()) {
            controller.getConfiguration().getLogger().info("Streaming input document " + source.getSystemId());
        }
        out.setPipelineConfiguration(pipe);
        try {
            Sender.send(source, out, parseOptions);
        } catch (XPathException e) {
            e.maybeSetLocation(locator);
            e.maybeSetErrorCode("FODC0002");
            throw e;
        }
    }


    /**
     * Resolve the fragment identifier within a URI Reference.
     * Only "bare names" XPointers are recognized, that is, a fragment identifier
     * that matches an ID attribute value within the target document.
     *
     * @param doc        the document node
     * @param fragmentId the fragment identifier (an ID value within the document)
     * @param context    the XPath dynamic context
     * @return the element within the supplied document that matches the
     *         given id value; or null if no such element is found.
     */

    private static NodeInfo getFragment(TreeInfo doc, String fragmentId, XPathContext context, Location locator) {
        // TODO: we only support one kind of fragment identifier. The rules say
        // that the interpretation of the fragment identifier depends on media type,
        // but we aren't getting the media type from the URIResolver.
        if (fragmentId == null) {
            return doc.getRootNode();
        }
        if (!NameChecker.isValidNCName(fragmentId)) {
            context.getController().warning("Invalid fragment identifier in URI", "XTDE1160", locator);
            return doc.getRootNode();
        }
        return doc.selectID(fragmentId, false);
    }

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy