All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.functions.ResolveURI Maven / Gradle / Ivy

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2015 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.functions;

import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Sequence;
import net.sf.saxon.om.ZeroOrOne;
import net.sf.saxon.trans.Err;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.AnyURIValue;
import net.sf.saxon.value.AtomicValue;

import java.io.File;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

/**
 * This class supports the resolve-uri() function in XPath 2.0
 */

public class ResolveURI extends SystemFunction  {

    /**
     * Evaluate the expression
     *
     * @param context   the dynamic evaluation context
     * @param arguments the values of the arguments, supplied as SequenceIterators
     * @return the result of the evaluation, in the form of a SequenceIterator
     * @throws net.sf.saxon.trans.XPathException
     *          if a dynamic error occurs during the evaluation of the expression
     */
    /*@Nullable*/
    public ZeroOrOne call(XPathContext context, Sequence[] arguments) throws XPathException {
        AtomicValue arg0 = (AtomicValue) arguments[0].head();
        if (arg0 == null) {
            return ZeroOrOne.empty();
        }
        String relative = arg0.getStringValue();
        String base;
        if (getArity() == 2) {
            //noinspection ConstantConditions
            base = arguments[1].head().getStringValue();
        } else {
            base = getStaticBaseUriString();
            if (base == null) {
                throw new XPathException("Base URI in static context of resolve-uri() is unknown", "FONS0005", context);
            }
        }

        return new ZeroOrOne(resolve(base, relative, context));
    }

    /*@NotNull*/
    private AnyURIValue resolve(String base, String relative, XPathContext context) throws XPathException {
        try {

            // Rule 4: "The function resolves the relative IRI reference $relative against the base IRI $base using
            // the algorithm defined in [RFC 3986], adapted by treating any ·character· that would not be valid in
            // an RFC3986 URI or relative reference in the same way that RFC3986 treats unreserved characters.
            // No percent-encoding takes place.

            // We rely on the Java implementation, but the Java implementation will not handle invalid characters
            // notably spaces. If there are spaces present, we escape them to prevent Java objecting, and then unescape
            // them at the end. We accept the consequence that if the input contains both escaped and unescaped spaces,
            // they will all be unescaped at the end.

            boolean escaped = false;
            if (relative.contains(" ")) {
                relative = escapeSpaces(relative);
                escaped = true;
            }
            if (base.contains(" ")) {
                base = escapeSpaces(base);
                escaped = true;
            }

            URI relativeURI = new URI(relative);
            if (relativeURI.isAbsolute()) {
                return new AnyURIValue(relative);
            }

            URI absoluteURI = new URI(base);
            if (!absoluteURI.isAbsolute()) {
                throw new XPathException("Base URI " + Err.wrap(base) + " is not an absolute URI", "FORG0002", context);
            }
            if (absoluteURI.isOpaque() && !base.startsWith("jar:")) {
                // Special-case JAR file URLs, even though non-conformant
                throw new XPathException("Base URI " + Err.wrap(base) + " is a non-hierarchic URI", "FORG0002", context);
            }
            if (absoluteURI.getRawFragment() != null) {
                throw new XPathException("Base URI " + Err.wrap(base) + " contains a fragment identifier", "FORG0002", context);
            }
            if (absoluteURI.getPath() != null && absoluteURI.getPath().isEmpty()) {
                // This deals with cases like base=http://www.example.com - changing it to http://www.example.com/
                absoluteURI = new URI(absoluteURI.getScheme(), absoluteURI.getUserInfo(), absoluteURI.getHost(),
                    absoluteURI.getPort(), "/", absoluteURI.getQuery(), absoluteURI.getFragment());
                base = absoluteURI.toString();
            }
            URI resolved = makeAbsolute(relative, base);
            resolved = resolved.normalize();
            String result = escaped ? unescapeSpaces(resolved.toString()) : resolved.toString();

            // Test case XSLT3 resolve-uri-022. Java even after normalization can leave a URI with trailing "../" or ".." parts.
            // Pragmatically, we just strip these off. This might not be enough if there are query or fragment parts, but it
            // gets us through the test

            while (result.endsWith("..")) {
                result = result.substring(0, result.length() - 2);
            }
            while (result.endsWith("../")) {
                result = result.substring(0, result.length()-3);
            }

            return new AnyURIValue(result);
        } catch (URISyntaxException err) {
            throw new XPathException("Base URI " + Err.wrap(base) + " is invalid: " + err.getMessage(),
                    "FORG0002", context);
        }
    }

    /**
     * If a system ID can't be parsed as a URL, try to expand it as a relative
     * URI using the current directory as the base URI.
     *
     * @param systemId the supplied systemId. Null is treated as equivalent to ""
     * @return the systemId itself if it is a valid URL; otherwise the result of resolving
     *         the systemId as a relative file name in the current working directory; or if the
     *         current working directory is not available (e.g. in an applet) the supplied systemId
     *         unchanged (except that null is treated as "").
     */

    /*@NotNull*/
    public static String tryToExpand(/*@Nullable*/ String systemId) {
        if (systemId == null) {
            systemId = "";
        }
        try {
            new URL(systemId);
            return systemId;   // all is well
        } catch (MalformedURLException err) {
            String dir;
            try {
                dir = System.getProperty("user.dir");
            } catch (Exception geterr) {
                // this doesn't work when running an applet
                return systemId;
            }
            if (!(dir.endsWith("/") || systemId.startsWith("/"))) {
                dir = dir + '/';
            }

            try {
                URI currentDirectoryURI = new File(dir).toURI();
                URI baseURI = currentDirectoryURI.resolve(systemId);
                return baseURI.toString();
            } catch (Exception e) {
                return systemId;
            }

        }
    }

    /**
     * Construct an absolute URI from a relative URI and a base URI. The method uses the resolve
     * method of the java.net.URI class, except where the base URI uses the (non-standard) "jar:" scheme,
     * in which case the method used is new URL(baseURL, relativeURL).
     * 

*

Spaces in either URI are converted to %20

*

*

If no base URI is available, and the relative URI is not an absolute URI, then the current * directory is used as a base URI.

* * @param relativeURI the relative URI. Null is permitted provided that the base URI is an absolute URI * @param base the base URI. Null is permitted provided that relativeURI is an absolute URI * @return the absolutized URI * @throws java.net.URISyntaxException if either of the strings is not a valid URI or * if the resolution fails */ /*@NotNull*/ public static URI makeAbsolute(/*@Nullable*/ String relativeURI, /*@Nullable*/ String base) throws URISyntaxException { URI absoluteURI; // System.err.println("makeAbsolute " + relativeURI + " against base " + base); if (relativeURI == null) { if (base == null) { throw new URISyntaxException("", "Relative and Base URI must not both be null"); } absoluteURI = new URI(ResolveURI.escapeSpaces(base)); if (!absoluteURI.isAbsolute()) { throw new URISyntaxException(base, "Relative URI not supplied, so base URI must be absolute"); } else { return absoluteURI; } } try { if (base == null || base.isEmpty()) { absoluteURI = new URI(relativeURI); if (!absoluteURI.isAbsolute()) { String expandedBase = ResolveURI.tryToExpand(base); if (!expandedBase.equals(base)) { // prevent infinite recursion return makeAbsolute(relativeURI, expandedBase); } } } else if (base != null && (base.startsWith("jar:") || base.startsWith("file:////"))) { // jar: URIs can't be resolved by the java.net.URI class, because they don't actually // conform with the RFC standards for hierarchic URI schemes (quite apart from not being // a registered URI scheme). But they seem to be widely used. // URIs starting file://// are accepted by the java.net.URI class, they are used to // represent Windows UNC filenames. However, the java.net.URI algorithm for resolving // a relative URI against such a base URI fails to produce a usable UNC filename (it's not // clear whether Java is implementing RFC 3986 correctly here, it depends on interpretation). // So we use the java.net.URL algorithm for this case too, because it works. try { URL baseURL = new URL(base); URL absoluteURL = new URL(baseURL, relativeURI); absoluteURI = absoluteURL.toURI(); } catch (MalformedURLException err) { throw new URISyntaxException(base + " " + relativeURI, err.getMessage()); } } else if (base != null && base.startsWith("classpath:")) { absoluteURI = new URI("classpath:" + relativeURI); } else { URI baseURI; try { baseURI = new URI(base); } catch (URISyntaxException e) { throw new URISyntaxException(base, "Invalid base URI: " + e.getMessage()); } if (baseURI.getFragment() != null) { int hash = base.indexOf('#'); if (hash >= 0) { base = base.substring(0, hash); } try { baseURI = new URI(base); } catch (URISyntaxException e) { throw new URISyntaxException(base, "Invalid base URI: " + e.getMessage()); } } try { new URI(relativeURI); // for validation only } catch (URISyntaxException e) { throw new URISyntaxException(base, "Invalid relative URI: " + e.getMessage()); } absoluteURI = relativeURI.isEmpty() ? baseURI : baseURI.resolve(relativeURI); } } catch (IllegalArgumentException err0) { // can be thrown by resolve() when given a bad URI throw new URISyntaxException(relativeURI, "Cannot resolve URI against base " + Err.wrap(base)); } return absoluteURI; } /** * Replace spaces by %20 * * @param s the input string * @return the input string with each space replaced by %20 */ /*@NotNull*/ public static String escapeSpaces(/*@NotNull*/ String s) { // It's not entirely clear why we have to escape spaces by hand, and not other special characters; // it's just that tests with a variety of filenames show that this approach seems to work. int i = s.indexOf(' '); if (i < 0) { return s; } return (i == 0 ? "" : s.substring(0, i)) + "%20" + (i == s.length() - 1 ? "" : escapeSpaces(s.substring(i + 1))); } /** * Replace %20 by space * * @param uri the input uri * @return the input URI with each %20 replaced by space */ /*@NotNull*/ public static String unescapeSpaces(/*@NotNull*/ String uri) { return uri.replace("%20", " "); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy