net.sf.saxon.resource.ResourceLoader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.resource;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.HashSet;
import java.util.Objects;
import java.util.zip.GZIPInputStream;
import static net.sf.saxon.resource.EncodingDetector.inferStreamEncoding;
/**
* The class provides a static method for loading resources from a URL.
* This method follows HTTP 301 and 302 redirects.
*/
public class ResourceLoader {
/**
* The maximum number of redirects to follow before throwing an IOException.
* If you allow the underlying Java URL class to follow redirects, it gives
* up after 20 hops.
*/
public static int MAX_REDIRECTS = 20;
/**
* Open a URLConnection to the resource identified by the URI. For HTTP URIs, this
* method will follow up to MAX_REDIRECTS redirects or until it detects a loop;
* the connection returned in this case is to the first resource that did not
* return a 301 or 302 response code.
*
* @param url The URL to retrieve.
* @return An InputStream for the resource content.
* @throws IOException If more than MAX_REDIRECTS are occur or if a loop is detected.
*/
public static URLConnection urlConnection(URL url) throws IOException {
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
HashSet visited = new HashSet<>();
String cookies = null;
int count = MAX_REDIRECTS;
for (;;) {
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setInstanceFollowRedirects(false);
conn.setRequestProperty("Accept-Encoding", "gzip");
if (cookies != null) {
conn.setRequestProperty("Cookie", cookies);
}
int status = conn.getResponseCode();
if (status == HttpURLConnection.HTTP_MOVED_PERM
|| status == HttpURLConnection.HTTP_MOVED_TEMP) {
String location = conn.getHeaderField("Location");
url = new URL(location);
cookies = conn.getHeaderField("Set-Cookie");
if (visited.contains(location)) {
throw new IOException("HTTP redirect loop through " + location);
}
visited.add(location);
count -= 1;
if (count < 0) {
throw new IOException("HTTP redirects more than " + MAX_REDIRECTS + " times");
}
} else {
return conn;
}
}
} else {
return url.openConnection();
}
}
/**
* Open a stream to retrieve the content identified by the URI. For HTTP URIs, this
* method will follow up to MAX_REDIRECTS redirects or until it detects a loop.
* This method automatically accepts and decompresses gzip encoded responses.
*
* @param url The URL to retrieve.
* @return An InputStream for the resource content.
* @throws IOException If more than MAX_REDIRECTS are occur or if a loop is detected.
*/
public static InputStream urlStream(URL url) throws IOException {
URLConnection conn = ResourceLoader.urlConnection(url);
InputStream inputStream = conn.getInputStream();
String contentEncoding = conn.getContentEncoding();
if ("gzip".equals(contentEncoding)) {
inputStream = new GZIPInputStream(inputStream);
}
return inputStream;
}
/**
* Open a reader to retrieve the content identified by the URI. This handles HTTP
* redirects in the same way as {@link #urlStream(URL)}, but then it also wraps
* the stream in a Reader, using the logic prescribed for the {@code fn:unparsed-text}
* function.
*
* @param url The URL to retrieve.
* @param requestedEncoding The requested encoding. This is used only as a fallback, following
* the rules of the {@code fn:unparsed-text} specification
* @return A Reader for the resource content.
* @throws IOException If more than MAX_REDIRECTS are occur or if a loop is detected.
*/
public static Reader urlReader(URL url, String requestedEncoding) throws IOException {
URLConnection conn = ResourceLoader.urlConnection(url);
InputStream inputStream = conn.getInputStream();
String contentEncoding = conn.getContentEncoding();
String resourceEncoding = null;
if ("gzip".equals(contentEncoding)) {
inputStream = new GZIPInputStream(inputStream);
}
if (!inputStream.markSupported()) {
inputStream = new BufferedInputStream(inputStream);
}
// Get any external (HTTP) requestedEncoding label.
boolean isXmlMediaType = false;
// The file:// URL scheme gives no useful information...
if (!"file".equals(url.getProtocol())) {
// Use the contentType from the HTTP header if available, and parse it
String contentType = conn.getContentType();
if (contentType != null) {
ParsedContentType parsedContentType = new ParsedContentType(contentType);
isXmlMediaType = parsedContentType.isXmlMediaType;
resourceEncoding = parsedContentType.encoding;
}
}
try {
if (requestedEncoding == null) {
requestedEncoding = "UTF-8";
}
if (resourceEncoding == null || isXmlMediaType) {
resourceEncoding = inferStreamEncoding(inputStream, requestedEncoding, null);
}
} catch (IOException e) {
resourceEncoding = "UTF-8";
}
assert resourceEncoding != null;
return getReaderFromStream(inputStream, resourceEncoding);
}
/**
* Get a reader corresponding to a binary input stream and an encoding. The mapping is such that
* any encoding errors that are detected lead to a fatal error, rather than being repaired or ignored
* @param inputStream the input stream. Non-null.
* @param resourceEncoding the encoding. Non-null
* @return a corresponding reader.
*/
public static BufferedReader getReaderFromStream(InputStream inputStream, String resourceEncoding) {
Objects.requireNonNull(inputStream);
Objects.requireNonNull(resourceEncoding);
Charset charset2 = Charset.forName(resourceEncoding);
// ensure that encoding errors are not recovered
CharsetDecoder decoder = charset2.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
return new BufferedReader(new InputStreamReader(inputStream, decoder));
}
}