net.sf.saxon.functions.ResolveURI Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.functions;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.lib.StandardURIChecker;
import net.sf.saxon.om.Sequence;
import net.sf.saxon.trans.Err;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.transpile.CSharpReplaceBody;
import net.sf.saxon.value.AnyURIValue;
import net.sf.saxon.value.AtomicValue;
import net.sf.saxon.value.EmptySequence;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
/**
* This class supports the resolve-uri() function in XPath 2.0
*/
public class ResolveURI extends SystemFunction {
/**
* Evaluate the expression
*
* @param context the dynamic evaluation context
* @param arguments the values of the arguments, supplied as SequenceIterators
* @return the result of the evaluation, in the form of a SequenceIterator
* @throws net.sf.saxon.trans.XPathException if a dynamic error occurs during the evaluation of the expression
*/
/*@Nullable*/
@Override
public Sequence call(XPathContext context, Sequence[] arguments) throws XPathException {
AtomicValue arg0 = (AtomicValue) arguments[0].head();
if (arg0 == null) {
return EmptySequence.getInstance();
}
String relative = arg0.getStringValue();
String base;
if (getArity() == 2) {
//noinspection ConstantConditions
base = arguments[1].head().getStringValue();
} else {
base = getStaticBaseUriString();
if (base == null) {
throw new XPathException("Base URI in static context of resolve-uri() is unknown", "FONS0005", context);
}
}
return resolve(base, relative, context);
}
/*@NotNull*/
private AnyURIValue resolve(String base, String relative, XPathContext context) throws XPathException {
// try {
// Rule 4: "The function resolves the relative IRI reference $relative against the base IRI $base using
// the algorithm defined in [RFC 3986], adapted by treating any ·character· that would not be valid in
// an RFC3986 URI or relative reference in the same way that RFC3986 treats unreserved characters.
// No percent-encoding takes place.
// We rely on the Java implementation, but the Java implementation will not handle invalid characters
// notably spaces. If there are spaces present, we escape them to prevent Java objecting, and then unescape
// them at the end. We accept the consequence that if the input contains both escaped and unescaped spaces,
// they will all be unescaped at the end.
boolean escaped = false;
if (relative.contains(" ")) {
relative = escapeSpaces(relative);
escaped = true;
}
if (base.contains(" ")) {
base = escapeSpaces(base);
escaped = true;
}
URI relativeURI = null;
try {
relativeURI = absoluteOrRelativeURI(relative);
} catch (URISyntaxException e) {
throw new XPathException("Relative URI " + Err.wrap(relative) + " is invalid: " + e.getMessage(),
"FORG0002", context);
}
if (relativeURI.isAbsolute()) {
return new AnyURIValue(relative);
}
URI absoluteURI = null;
try {
absoluteURI = new URI(base);
} catch (URISyntaxException e) {
throw new XPathException("Base URI " + Err.wrap(base) + " is invalid: " + e.getMessage(),
"FORG0002", context);
}
if (!absoluteURI.isAbsolute()) {
throw new XPathException("Base URI " + Err.wrap(base) + " is not an absolute URI", "FORG0002", context);
}
if (absoluteURI.isOpaque() && !base.startsWith("jar:")) {
// Special-case JAR file URLs, even though non-conformant
throw new XPathException("Base URI " + Err.wrap(base) + " is a non-hierarchic URI", "FORG0002", context);
}
String fragment = absoluteURI.getRawFragment();
if (fragment != null && !fragment.isEmpty()) {
throw new XPathException("Base URI " + Err.wrap(base) + " contains a fragment identifier", "FORG0002", context);
}
if (!base.startsWith("jar:") && absoluteURI.getPath() != null && absoluteURI.getPath().isEmpty()) {
// This deals with cases like base=http://www.example.com - changing it to http://www.example.com/
try {
absoluteURI = new URI(absoluteURI.getScheme(), absoluteURI.getUserInfo(), absoluteURI.getHost(),
absoluteURI.getPort(), "/", absoluteURI.getQuery(), absoluteURI.getFragment());
} catch (URISyntaxException e) {
throw new XPathException("Failed to parse JAR scheme URI " +
Err.wrap(absoluteURI.toASCIIString()), "FORG0002", context);
}
base = absoluteURI.toString();
}
URI resolved = null;
try {
resolved = makeAbsolute(relative, base);
} catch (URISyntaxException e) {
throw new XPathException(e.getMessage(), "FORG0002");
}
if (!resolved.toASCIIString().startsWith("file:////")) {
resolved = resolved.normalize();
}
// The spec says that special characters are not escaped. But if the input was percent-escaped,
// we want the output to be percent-escaped too. Java achieves this automatically, but on C#
// it needs special attention.
boolean inputIsPercentEncoded = base.contains("%") || relative.contains("%");
String resolvedString = inputIsPercentEncoded ? resolved.toASCIIString() : resolved.toString();
String result = escaped ? unescapeSpaces(resolvedString) : resolvedString;
// Test case XSLT3 resolve-uri-022. Java even after normalization can leave a URI with trailing "../" or ".." parts.
// Pragmatically, we just strip these off. This might not be enough if there are query or fragment parts, but it
// gets us through the test
while (result.endsWith("..")) {
result = result.substring(0, result.length() - 2);
}
while (result.endsWith("../")) {
result = result.substring(0, result.length() - 3);
}
return new AnyURIValue(result);
}
@CSharpReplaceBody(code="return new System.Uri(href, System.UriKind.RelativeOrAbsolute);")
public static URI absoluteOrRelativeURI(String href) throws URISyntaxException {
return new URI(href);
}
/**
* If a system ID can't be parsed as a URL, try to expand it as a relative
* URI using the current directory as the base URI.
*
* @param systemId the supplied systemId. Null is treated as equivalent to ""
* @return the systemId itself if it is a valid URL; otherwise the result of resolving
* the systemId as a relative file name in the current working directory; or if the
* current working directory is not available (e.g. in an applet) the supplied systemId
* unchanged (except that null is treated as "").
*/
/*@NotNull*/
public static String tryToExpand(/*@Nullable*/ String systemId) {
if (systemId == null || systemId.isEmpty()) {
return resolveAgainstCurrentDirectory("");
}
try {
new URL(systemId);
return systemId; // all is well
} catch (MalformedURLException err) {
return resolveAgainstCurrentDirectory(systemId);
}
}
private static String resolveAgainstCurrentDirectory(String systemId) {
String dir;
try {
dir = System.getProperty("user.dir");
} catch (Exception geterr) {
// this doesn't work when running an applet
return systemId;
}
if (!(dir.endsWith("/") || systemId.startsWith("/"))) {
dir = dir + '/';
}
try {
URI currentDirectoryURI = new File(dir).toURI();
URI baseURI = currentDirectoryURI.resolve(systemId);
return baseURI.toString();
} catch (Exception e) {
return systemId;
}
}
/**
* Construct an absolute URI from a relative URI and a base URI. The method uses the resolve
* method of the java.net.URI class, except where the base URI uses the (non-standard) "jar:" scheme,
* in which case the method used is new URL(baseURL, relativeURL)
.
* Spaces in either URI are converted to %20
* If no base URI is available, and the relative URI is not an absolute URI, then the current
* directory is used as a base URI.
*
* @param relativeURI the relative URI. Null is permitted provided that the base URI is an absolute URI
* @param base the base URI. Null is permitted provided that relativeURI is an absolute URI
* @return the absolutized URI
* @throws java.net.URISyntaxException if either of the strings is not a valid URI or
* if the resolution fails
*/
/*@NotNull*/
public static URI makeAbsolute(/*@Nullable*/ String relativeURI, /*@Nullable*/ String base) throws URISyntaxException {
URI absoluteURI;
StandardURIChecker checker = StandardURIChecker.getInstance();
// System.err.println("makeAbsolute " + relativeURI + " against base " + base);
if (relativeURI == null) {
if (base == null) {
throw failure("", "Relative and Base URI must not both be null");
}
absoluteURI = new URI(ResolveURI.escapeSpaces(base));
checker.checkThoroughly(absoluteURI);
if (!absoluteURI.isAbsolute()) {
throw failure(base, "Relative URI not supplied, so base URI must be absolute");
} else {
return absoluteURI;
}
}
if (relativeURI.startsWith("classpath:")) {
// Resolving a classpath: URI involves searching the classpath.
// There's no sense in which it makes sense to attempt to make one absolute
// against some base URI. They're effectively absolute already.
// (If we don't do this, passing them to java.net.URL causes an exception
// anyway.)
return new URI(relativeURI);
}
try {
if (base == null || base.isEmpty()) {
absoluteURI = new URI(relativeURI);
if (!absoluteURI.isAbsolute()) {
String expandedBase = ResolveURI.tryToExpand(base);
if (!expandedBase.equals(base)) { // prevent infinite recursion
return makeAbsolute(relativeURI, expandedBase);
}
}
} else if (base.startsWith("jar:") || base.startsWith("file:////")) {
// jar: URIs can't be resolved by the java.net.URI class, because they don't actually
// conform with the RFC standards for hierarchic URI schemes (quite apart from not being
// a registered URI scheme). But they seem to be widely used.
// URIs starting file://// are accepted by the java.net.URI class, they are used to
// represent Windows UNC filenames. However, the java.net.URI algorithm for resolving
// a relative URI against such a base URI fails to produce a usable UNC filename (it's not
// clear whether Java is implementing RFC 3986 correctly here, it depends on interpretation).
// So we use the java.net.URL algorithm for this case too, because it works.
try {
URL baseURL = new URL(base);
URL absoluteURL = new URL(baseURL, relativeURI);
absoluteURI = absoluteURL.toURI();
} catch (MalformedURLException err) {
throw failure(base + " " + relativeURI, err.getMessage());
}
} else if (base.startsWith("classpath:")) {
absoluteURI = new URI(relativeURI);
if (!absoluteURI.isAbsolute()) {
// URIs in the classpath: scheme are a bit of a mess. Given "classpath:/path/to/thing",
// if you attempt to use ClassLoader.getSystemResourceAsStream("/path/to/thing"), it
// will fail because the leading slash is a problem. Conversely, if you have
// "classpath:path/to/thing" and you try to resolve "otherthing" against it,
// you'll get "classpath:otherthing" which is almost certainly wrong. The only
// way around it seems to be to fake the scheme long enough to get correct
// resolution.
String path = base.substring(10);
URI fakeURI;
if (path.startsWith("/")) {
fakeURI = URI.create("file://" + path).resolve(relativeURI);
} else {
fakeURI = URI.create("file:///" + path).resolve(relativeURI);
}
String cpath = fakeURI.getPath().substring(1);
if (cpath.startsWith("../")) {
throw new IllegalArgumentException("Attempt to navigate above root: classpath:" + cpath);
}
absoluteURI = URI.create("classpath:" + cpath);
}
} else {
URI baseURI;
try {
baseURI = new URI(base);
} catch (URISyntaxException e) {
throw failure(base, "Invalid base URI: " + e.getMessage());
}
int hash = base.indexOf('#');
if (hash >= 0) {
base = base.substring(0, hash);
try {
baseURI = new URI(base);
checker.checkThoroughly(baseURI);
} catch (URISyntaxException e) {
throw failure(base, "Invalid base URI: " + e.getMessage());
}
}
URI absOrRel;
try {
absOrRel = absoluteOrRelativeURI(relativeURI); // for validation only
checker.checkThoroughly(absOrRel);
} catch (URISyntaxException e) {
throw failure(base, "Invalid relative URI: " + e.getMessage());
}
if (absOrRel.isAbsolute()) {
absoluteURI = absOrRel;
} else {
absoluteURI = relativeURI.isEmpty() ? baseURI : baseURI.resolve(relativeURI);
}
}
} catch (IllegalArgumentException err0) {
// can be thrown by resolve() when given a bad URI
throw failure(relativeURI, "Cannot resolve URI against base " + Err.wrap(base));
}
return absoluteURI;
}
@CSharpReplaceBody(code="return new System.UriFormatException(\"Failed to resolve \" + input + \": \" + reason);")
private static URISyntaxException failure(String input, String reason) {
return new URISyntaxException(input, reason);
}
/**
* Replace spaces by %20
*
* @param s the input string
* @return the input string with each space replaced by %20
*/
/*@NotNull*/
public static String escapeSpaces(/*@NotNull*/ String s) {
// It's not entirely clear why we have to escape spaces by hand, and not other special characters;
// it's just that tests with a variety of filenames show that this approach seems to work.
int i = s.indexOf(' ');
if (i < 0) {
return s;
}
return (i == 0 ? "" : s.substring(0, i))
+ "%20"
+ (i == s.length() - 1 ? "" : escapeSpaces(s.substring(i + 1)));
}
/**
* Replace %20 by space
*
* @param uri the input uri
* @return the input URI with each %20 replaced by space
*/
/*@NotNull*/
public static String unescapeSpaces(/*@NotNull*/ String uri) {
return uri.replace("%20", " ");
}
}