org.jsoup.helper.HttpConnection Maven / Gradle / Ivy
The newest version!
package org.jsoup.helper;
import org.jsoup.Connection;
import org.jsoup.HttpStatusException;
import org.jsoup.Progress;
import org.jsoup.UncheckedIOException;
import org.jsoup.UnsupportedMimeTypeException;
import org.jsoup.internal.ControllableInputStream;
import org.jsoup.internal.Functions;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.parser.StreamParser;
import org.jsoup.parser.TokenQueue;
import org.jspecify.annotations.Nullable;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSocketFactory;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.CookieManager;
import java.net.CookieStore;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import static org.jsoup.Connection.Method.HEAD;
import static org.jsoup.helper.DataUtil.UTF_8;
import static org.jsoup.internal.Normalizer.lowerCase;
import static org.jsoup.internal.SharedConstants.DefaultBufferSize;
/**
* Implementation of {@link Connection}.
* @see org.jsoup.Jsoup#connect(String)
*/
@SuppressWarnings("CharsetObjectCanBeUsed")
public class HttpConnection implements Connection {
public static final String CONTENT_ENCODING = "Content-Encoding";
/**
* Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop
* vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA.
*/
public static final String DEFAULT_UA =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36";
private static final String USER_AGENT = "User-Agent";
public static final String CONTENT_TYPE = "Content-Type";
public static final String MULTIPART_FORM_DATA = "multipart/form-data";
public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded";
private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set.
private static final String DefaultUploadType = "application/octet-stream";
private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
/**
Create a new Connection, with the request URL specified.
@param url the URL to fetch from
@return a new Connection object
*/
public static Connection connect(String url) {
Connection con = new HttpConnection();
con.url(url);
return con;
}
/**
Create a new Connection, with the request URL specified.
@param url the URL to fetch from
@return a new Connection object
*/
public static Connection connect(URL url) {
Connection con = new HttpConnection();
con.url(url);
return con;
}
/**
Create a new, empty HttpConnection.
*/
public HttpConnection() {
req = new Request();
}
/**
Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not
copied. All other settings (proxy, parser, cookies, etc) are copied.
@param copy the request to copy
*/
HttpConnection(Request copy) {
req = new Request(copy);
}
private static String encodeMimeName(String val) {
return val.replace("\"", "%22");
}
private HttpConnection.Request req;
private Connection.@Nullable Response res;
@Override
public Connection newRequest() {
// copy the prototype request for the different settings, cookie manager, etc
return new HttpConnection(req);
}
/** Create a new Connection that just wraps the provided Request and Response */
private HttpConnection(Request req, Response res) {
this.req = req;
this.res = res;
}
@Override
public Connection url(URL url) {
req.url(url);
return this;
}
@Override
public Connection url(String url) {
Validate.notEmptyParam(url, "url");
try {
req.url(new URL(url));
} catch (MalformedURLException e) {
throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e);
}
return this;
}
@Override
public Connection proxy(@Nullable Proxy proxy) {
req.proxy(proxy);
return this;
}
@Override
public Connection proxy(String host, int port) {
req.proxy(host, port);
return this;
}
@Override
public Connection userAgent(String userAgent) {
Validate.notNullParam(userAgent, "userAgent");
req.header(USER_AGENT, userAgent);
return this;
}
@Override
public Connection timeout(int millis) {
req.timeout(millis);
return this;
}
@Override
public Connection maxBodySize(int bytes) {
req.maxBodySize(bytes);
return this;
}
@Override
public Connection followRedirects(boolean followRedirects) {
req.followRedirects(followRedirects);
return this;
}
@Override
public Connection referrer(String referrer) {
Validate.notNullParam(referrer, "referrer");
req.header("Referer", referrer);
return this;
}
@Override
public Connection method(Method method) {
req.method(method);
return this;
}
@Override
public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
req.ignoreHttpErrors(ignoreHttpErrors);
return this;
}
@Override
public Connection ignoreContentType(boolean ignoreContentType) {
req.ignoreContentType(ignoreContentType);
return this;
}
@Override
public Connection data(String key, String value) {
req.data(KeyVal.create(key, value));
return this;
}
@Override
public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) {
req.sslSocketFactory(sslSocketFactory);
return this;
}
@Override
public Connection data(String key, String filename, InputStream inputStream) {
req.data(KeyVal.create(key, filename, inputStream));
return this;
}
@Override
public Connection data(String key, String filename, InputStream inputStream, String contentType) {
req.data(KeyVal.create(key, filename, inputStream).contentType(contentType));
return this;
}
@Override
public Connection data(Map data) {
Validate.notNullParam(data, "data");
for (Map.Entry entry : data.entrySet()) {
req.data(KeyVal.create(entry.getKey(), entry.getValue()));
}
return this;
}
@Override
public Connection data(String... keyvals) {
Validate.notNullParam(keyvals, "keyvals");
Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
for (int i = 0; i < keyvals.length; i += 2) {
String key = keyvals[i];
String value = keyvals[i+1];
Validate.notEmpty(key, "Data key must not be empty");
Validate.notNull(value, "Data value must not be null");
req.data(KeyVal.create(key, value));
}
return this;
}
@Override
public Connection data(Collection data) {
Validate.notNullParam(data, "data");
for (Connection.KeyVal entry: data) {
req.data(entry);
}
return this;
}
@Override
public Connection.KeyVal data(String key) {
Validate.notEmptyParam(key, "key");
for (Connection.KeyVal keyVal : request().data()) {
if (keyVal.key().equals(key))
return keyVal;
}
return null;
}
@Override
public Connection requestBody(String body) {
req.requestBody(body);
return this;
}
@Override
public Connection header(String name, String value) {
req.header(name, value);
return this;
}
@Override
public Connection headers(Map headers) {
Validate.notNullParam(headers, "headers");
for (Map.Entry entry : headers.entrySet()) {
req.header(entry.getKey(),entry.getValue());
}
return this;
}
@Override
public Connection cookie(String name, String value) {
req.cookie(name, value);
return this;
}
@Override
public Connection cookies(Map cookies) {
Validate.notNullParam(cookies, "cookies");
for (Map.Entry entry : cookies.entrySet()) {
req.cookie(entry.getKey(), entry.getValue());
}
return this;
}
@Override
public Connection cookieStore(CookieStore cookieStore) {
// create a new cookie manager using the new store
req.cookieManager = new CookieManager(cookieStore, null);
return this;
}
@Override
public CookieStore cookieStore() {
return req.cookieManager.getCookieStore();
}
@Override
public Connection parser(Parser parser) {
req.parser(parser);
return this;
}
@Override
public Document get() throws IOException {
req.method(Method.GET);
execute();
Validate.notNull(res);
return res.parse();
}
@Override
public Document post() throws IOException {
req.method(Method.POST);
execute();
Validate.notNull(res);
return res.parse();
}
@Override
public Connection.Response execute() throws IOException {
res = Response.execute(req);
return res;
}
@Override
public Connection.Request request() {
return req;
}
@Override
public Connection request(Connection.Request request) {
req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired
return this;
}
@Override
public Connection.Response response() {
if (res == null) {
throw new IllegalArgumentException("You must execute the request before getting a response.");
}
return res;
}
@Override
public Connection response(Connection.Response response) {
res = response;
return this;
}
@Override
public Connection postDataCharset(String charset) {
req.postDataCharset(charset);
return this;
}
@Override public Connection auth(RequestAuthenticator authenticator) {
req.auth(authenticator);
return this;
}
@Override public Connection onResponseProgress(Progress handler) {
req.responseProgress = handler;
return this;
}
@SuppressWarnings("unchecked")
private static abstract class Base> implements Connection.Base {
private static final URL UnsetUrl; // only used if you created a new Request()
static {
try {
UnsetUrl = new URL("http://undefined/");
} catch (MalformedURLException e) {
throw new IllegalStateException(e);
}
}
URL url = UnsetUrl;
Method method = Method.GET;
Map> headers;
Map cookies;
private Base() {
headers = new LinkedHashMap<>();
cookies = new LinkedHashMap<>();
}
private Base(Base copy) {
url = copy.url; // unmodifiable object
method = copy.method;
headers = new LinkedHashMap<>();
for (Map.Entry> entry : copy.headers.entrySet()) {
headers.put(entry.getKey(), new ArrayList<>(entry.getValue()));
}
cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings
}
@Override
public URL url() {
if (url == UnsetUrl)
throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request.");
return url;
}
@Override
public T url(URL url) {
Validate.notNullParam(url, "url");
this.url = new UrlBuilder(url).build();
return (T) this;
}
@Override
public Method method() {
return method;
}
@Override
public T method(Method method) {
Validate.notNullParam(method, "method");
this.method = method;
return (T) this;
}
@Override
public String header(String name) {
Validate.notNullParam(name, "name");
List vals = getHeadersCaseInsensitive(name);
if (vals.size() > 0) {
// https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
return StringUtil.join(vals, ", ");
}
return null;
}
@Override
public T addHeader(String name, @Nullable String value) {
Validate.notEmptyParam(name, "name");
value = value == null ? "" : value;
List values = headers(name);
if (values.isEmpty()) {
values = new ArrayList<>();
headers.put(name, values);
}
values.add(value);
return (T) this;
}
@Override
public List headers(String name) {
Validate.notEmptyParam(name, "name");
return getHeadersCaseInsensitive(name);
}
@Override
public T header(String name, String value) {
Validate.notEmptyParam(name, "name");
removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding"
addHeader(name, value);
return (T) this;
}
@Override
public boolean hasHeader(String name) {
Validate.notEmptyParam(name, "name");
return !getHeadersCaseInsensitive(name).isEmpty();
}
/**
* Test if the request has a header with this value (case insensitive).
*/
@Override
public boolean hasHeaderWithValue(String name, String value) {
Validate.notEmpty(name);
Validate.notEmpty(value);
List values = headers(name);
for (String candidate : values) {
if (value.equalsIgnoreCase(candidate))
return true;
}
return false;
}
@Override
public T removeHeader(String name) {
Validate.notEmptyParam(name, "name");
Map.Entry> entry = scanHeaders(name); // remove is case-insensitive too
if (entry != null)
headers.remove(entry.getKey()); // ensures correct case
return (T) this;
}
@Override
public Map headers() {
LinkedHashMap map = new LinkedHashMap<>(headers.size());
for (Map.Entry> entry : headers.entrySet()) {
String header = entry.getKey();
List values = entry.getValue();
if (values.size() > 0)
map.put(header, values.get(0));
}
return map;
}
@Override
public Map> multiHeaders() {
return headers;
}
private List getHeadersCaseInsensitive(String name) {
Validate.notNull(name);
for (Map.Entry> entry : headers.entrySet()) {
if (name.equalsIgnoreCase(entry.getKey()))
return entry.getValue();
}
return Collections.emptyList();
}
private Map.@Nullable Entry> scanHeaders(String name) {
String lc = lowerCase(name);
for (Map.Entry> entry : headers.entrySet()) {
if (lowerCase(entry.getKey()).equals(lc))
return entry;
}
return null;
}
@Override
public String cookie(String name) {
Validate.notEmptyParam(name, "name");
return cookies.get(name);
}
@Override
public T cookie(String name, String value) {
Validate.notEmptyParam(name, "name");
Validate.notNullParam(value, "value");
cookies.put(name, value);
return (T) this;
}
@Override
public boolean hasCookie(String name) {
Validate.notEmptyParam(name, "name");
return cookies.containsKey(name);
}
@Override
public T removeCookie(String name) {
Validate.notEmptyParam(name, "name");
cookies.remove(name);
return (T) this;
}
@Override
public Map cookies() {
return cookies;
}
}
public static class Request extends HttpConnection.Base implements Connection.Request {
static {
System.setProperty("sun.net.http.allowRestrictedHeaders", "true");
// make sure that we can send Sec-Fetch-Site headers etc.
}
private @Nullable Proxy proxy;
private int timeoutMilliseconds;
private int maxBodySizeBytes;
private boolean followRedirects;
private final Collection data;
private @Nullable String body = null;
private boolean ignoreHttpErrors = false;
private boolean ignoreContentType = false;
private Parser parser;
private boolean parserDefined = false; // called parser(...) vs initialized in ctor
private String postDataCharset = DataUtil.defaultCharsetName;
private @Nullable SSLSocketFactory sslSocketFactory;
private CookieManager cookieManager;
private @Nullable RequestAuthenticator authenticator;
private @Nullable Progress responseProgress;
private volatile boolean executing = false;
Request() {
super();
timeoutMilliseconds = 30000; // 30 seconds
maxBodySizeBytes = 1024 * 1024 * 2; // 2MB
followRedirects = true;
data = new ArrayList<>();
method = Method.GET;
addHeader("Accept-Encoding", "gzip");
addHeader(USER_AGENT, DEFAULT_UA);
parser = Parser.htmlParser();
cookieManager = new CookieManager(); // creates a default InMemoryCookieStore
}
Request(Request copy) {
super(copy);
proxy = copy.proxy;
postDataCharset = copy.postDataCharset;
timeoutMilliseconds = copy.timeoutMilliseconds;
maxBodySizeBytes = copy.maxBodySizeBytes;
followRedirects = copy.followRedirects;
data = new ArrayList<>(); // data not copied
//body not copied
ignoreHttpErrors = copy.ignoreHttpErrors;
ignoreContentType = copy.ignoreContentType;
parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy
parserDefined = copy.parserDefined;
sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share
cookieManager = copy.cookieManager;
authenticator = copy.authenticator;
responseProgress = copy.responseProgress;
executing = false;
}
@Override
public Proxy proxy() {
return proxy;
}
@Override
public Request proxy(@Nullable Proxy proxy) {
this.proxy = proxy;
return this;
}
@Override
public Request proxy(String host, int port) {
this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port));
return this;
}
@Override
public int timeout() {
return timeoutMilliseconds;
}
@Override
public Request timeout(int millis) {
Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
timeoutMilliseconds = millis;
return this;
}
@Override
public int maxBodySize() {
return maxBodySizeBytes;
}
@Override
public Connection.Request maxBodySize(int bytes) {
Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger");
maxBodySizeBytes = bytes;
return this;
}
@Override
public boolean followRedirects() {
return followRedirects;
}
@Override
public Connection.Request followRedirects(boolean followRedirects) {
this.followRedirects = followRedirects;
return this;
}
@Override
public boolean ignoreHttpErrors() {
return ignoreHttpErrors;
}
@Override
public SSLSocketFactory sslSocketFactory() {
return sslSocketFactory;
}
@Override
public void sslSocketFactory(SSLSocketFactory sslSocketFactory) {
this.sslSocketFactory = sslSocketFactory;
}
@Override
public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
this.ignoreHttpErrors = ignoreHttpErrors;
return this;
}
@Override
public boolean ignoreContentType() {
return ignoreContentType;
}
@Override
public Connection.Request ignoreContentType(boolean ignoreContentType) {
this.ignoreContentType = ignoreContentType;
return this;
}
@Override
public Request data(Connection.KeyVal keyval) {
Validate.notNullParam(keyval, "keyval");
data.add(keyval);
return this;
}
@Override
public Collection data() {
return data;
}
@Override
public Connection.Request requestBody(@Nullable String body) {
this.body = body;
return this;
}
@Override
public String requestBody() {
return body;
}
@Override
public Request parser(Parser parser) {
this.parser = parser;
parserDefined = true;
return this;
}
@Override
public Parser parser() {
return parser;
}
@Override
public Connection.Request postDataCharset(String charset) {
Validate.notNullParam(charset, "charset");
if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset);
this.postDataCharset = charset;
return this;
}
@Override
public String postDataCharset() {
return postDataCharset;
}
CookieManager cookieManager() {
return cookieManager;
}
@Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) {
this.authenticator = authenticator;
return this;
}
@Override @Nullable public RequestAuthenticator auth() {
return authenticator;
}
}
public static class Response extends HttpConnection.Base implements Connection.Response {
private static final int MAX_REDIRECTS = 20;
private static final String LOCATION = "Location";
private final int statusCode;
private final String statusMessage;
private @Nullable ByteBuffer byteData;
private @Nullable ControllableInputStream bodyStream;
private @Nullable HttpURLConnection conn;
private @Nullable String charset;
private @Nullable final String contentType;
private boolean executed = false;
private boolean inputStreamRead = false;
private int numRedirects = 0;
private final HttpConnection.Request req;
/*
* Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc)
*/
private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*");
/**
Internal only! Creates a dummy HttpConnection.Response, useful for testing. All actual responses
are created from the HttpURLConnection and fields defined.
*/
Response() {
super();
statusCode = 400;
statusMessage = "Request not made";
req = new Request();
contentType = null;
}
static Response execute(HttpConnection.Request req) throws IOException {
return execute(req, null);
}
static Response execute(HttpConnection.Request req, @Nullable Response previousResponse) throws IOException {
synchronized (req) {
Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads.");
req.executing = true;
}
Validate.notNullParam(req, "req");
URL url = req.url();
Validate.notNull(url, "URL must be specified to connect");
String protocol = url.getProtocol();
if (!protocol.equals("http") && !protocol.equals("https"))
throw new MalformedURLException("Only http & https protocols supported");
final boolean methodHasBody = req.method().hasBody();
final boolean hasRequestBody = req.requestBody() != null;
if (!methodHasBody)
Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method());
// set up the request for execution
String mimeBoundary = null;
if (req.data().size() > 0 && (!methodHasBody || hasRequestBody))
serialiseRequestUrl(req);
else if (methodHasBody)
mimeBoundary = setOutputContentType(req);
long startTime = System.nanoTime();
HttpURLConnection conn = createConnection(req);
Response res = null;
try {
conn.connect();
if (conn.getDoOutput()) {
OutputStream out = conn.getOutputStream();
try { writePost(req, out, mimeBoundary); }
catch (IOException e) { conn.disconnect(); throw e; }
finally { out.close(); }
}
int status = conn.getResponseCode();
res = new Response(conn, req, previousResponse);
// redirect if there's a location header (from 3xx, or 201 etc)
if (res.hasHeader(LOCATION) && req.followRedirects()) {
if (status != HTTP_TEMP_REDIR) {
req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
req.data().clear();
req.requestBody(null);
req.removeHeader(CONTENT_TYPE);
}
String location = res.header(LOCATION);
Validate.notNull(location);
if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php
location = location.substring(6);
URL redir = StringUtil.resolve(req.url(), location);
req.url(redir);
req.executing = false;
return execute(req, res);
}
if ((status < 200 || status >= 400) && !req.ignoreHttpErrors())
throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString());
// check that we can handle the returned content type; if not, abort before fetching it
String contentType = res.contentType();
if (contentType != null
&& !req.ignoreContentType()
&& !contentType.startsWith("text/")
&& !xmlContentTypeRxp.matcher(contentType).matches()
)
throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml",
contentType, req.url().toString());
// switch to the XML parser if content type is xml and not parser not explicitly set
if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) {
if (!req.parserDefined) req.parser(Parser.xmlParser());
}
res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body
InputStream stream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip"))
stream = new GZIPInputStream(stream);
else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate"))
stream = new InflaterInputStream(stream, new Inflater(true));
res.bodyStream = ControllableInputStream.wrap(
stream, DefaultBufferSize, req.maxBodySize())
.timeout(startTime, req.timeout());
if (req.responseProgress != null) // set response progress listener
res.bodyStream.onProgress(conn.getContentLength(), req.responseProgress, res);
} else {
res.byteData = DataUtil.emptyByteBuffer();
}
} catch (IOException e) {
if (res != null) res.safeClose(); // will be non-null if got to conn
throw e;
} finally {
req.executing = false;
// detach any thread local auth delegate
if (req.authenticator != null)
AuthenticationHandler.handler.remove();
}
res.executed = true;
return res;
}
@Override
public int statusCode() {
return statusCode;
}
@Override
public String statusMessage() {
return statusMessage;
}
@Override
public String charset() {
return charset;
}
@Override
public Response charset(String charset) {
this.charset = charset;
return this;
}
@Override
public String contentType() {
return contentType;
}
/** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */
private ControllableInputStream prepareParse() {
Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
ControllableInputStream stream = bodyStream;
if (byteData != null) { // bytes have been read in to the buffer, parse that
ByteArrayInputStream bytes = new ByteArrayInputStream(byteData.array(), 0, byteData.limit());
stream = ControllableInputStream.wrap(bytes, 0); // no max
inputStreamRead = false; // ok to reparse if in bytes
}
Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read.");
Validate.notNull(stream);
inputStreamRead = true;
return stream;
}
@Override public Document parse() throws IOException {
ControllableInputStream stream = prepareParse();
Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser());
doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req?
charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
safeClose();
return doc;
}
@Override public StreamParser streamParser() throws IOException {
ControllableInputStream stream = prepareParse();
String baseUri = url.toExternalForm();
DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser());
// note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit.
// set up the stream parser and rig this connection up to the parsed doc:
StreamParser streamer = new StreamParser(req.parser());
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset));
streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it
streamer.document().connection(new HttpConnection(req, this));
charset = charsetDoc.charset.name();
// we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream
return streamer;
}
private void prepareByteData() {
Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
if (bodyStream != null && byteData == null) {
Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())");
try {
byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize());
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
inputStreamRead = true;
safeClose();
}
}
}
@Override
public String body() {
prepareByteData();
Validate.notNull(byteData);
// charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
String body = (charset == null ? UTF_8 : Charset.forName(charset))
.decode(byteData).toString();
((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9
return body;
}
@Override
public byte[] bodyAsBytes() {
prepareByteData();
Validate.notNull(byteData);
Validate.isTrue(byteData.hasArray()); // we made it, so it should
byte[] array = byteData.array();
int offset = byteData.arrayOffset();
int length = byteData.limit();
if (offset == 0 && length == array.length) { // exact, just return it
return array;
} else { // trim to size
byte[] exactArray = new byte[length];
System.arraycopy(array, offset, exactArray, 0, length);
return exactArray;
}
}
@Override
public Connection.Response bufferUp() {
prepareByteData();
return this;
}
@Override
public BufferedInputStream bodyStream() {
Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
// if we have read to bytes (via buffer up), return those as a stream.
if (byteData != null) {
return new BufferedInputStream(
new ByteArrayInputStream(byteData.array(), 0, byteData.limit()),
DefaultBufferSize);
}
Validate.isFalse(inputStreamRead, "Request has already been read");
Validate.notNull(bodyStream);
inputStreamRead = true;
return bodyStream.inputStream();
}
// set up connection defaults, and details from request
private static HttpURLConnection createConnection(HttpConnection.Request req) throws IOException {
Proxy proxy = req.proxy();
final HttpURLConnection conn = (HttpURLConnection) (
proxy == null ?
req.url().openConnection() :
req.url().openConnection(proxy)
);
conn.setRequestMethod(req.method().name());
conn.setInstanceFollowRedirects(false); // don't rely on native redirection support
conn.setConnectTimeout(req.timeout());
conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read
if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection)
((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory());
if (req.authenticator != null)
AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally
if (req.method().hasBody())
conn.setDoOutput(true);
CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store
for (Map.Entry> header : req.multiHeaders().entrySet()) {
for (String value : header.getValue()) {
conn.addRequestProperty(header.getKey(), value);
}
}
return conn;
}
/**
* Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows
* keep-alives to work (as the underlying connection is actually held open, despite the name).
*/
private void safeClose() {
if (bodyStream != null) {
try {
bodyStream.close();
} catch (IOException e) {
// no-op
} finally {
bodyStream = null;
}
}
if (conn != null) {
conn.disconnect();
conn = null;
}
}
// set up url, method, header, cookies
private Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) throws IOException {
this.conn = conn;
this.req = request;
method = Method.valueOf(conn.getRequestMethod());
url = conn.getURL();
statusCode = conn.getResponseCode();
statusMessage = conn.getResponseMessage();
contentType = conn.getContentType();
Map> resHeaders = createHeaderMap(conn);
processResponseHeaders(resHeaders); // includes cookie key/val read during header scan
CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store
if (previousResponse != null) { // was redirected
// map previous response cookies into this response cookies() object
for (Map.Entry prevCookie : previousResponse.cookies().entrySet()) {
if (!hasCookie(prevCookie.getKey()))
cookie(prevCookie.getKey(), prevCookie.getValue());
}
previousResponse.safeClose();
// enforce too many redirects:
numRedirects = previousResponse.numRedirects + 1;
if (numRedirects >= MAX_REDIRECTS)
throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
}
}
private static LinkedHashMap> createHeaderMap(HttpURLConnection conn) {
// the default sun impl of conn.getHeaderFields() returns header values out of order
final LinkedHashMap> headers = new LinkedHashMap<>();
int i = 0;
while (true) {
final String key = conn.getHeaderFieldKey(i);
final String val = conn.getHeaderField(i);
if (key == null && val == null)
break;
i++;
if (key == null || val == null)
continue; // skip http1.1 line
final List vals = headers.computeIfAbsent(key, Functions.listFunction());
vals.add(val);
}
return headers;
}
void processResponseHeaders(Map> resHeaders) {
for (Map.Entry> entry : resHeaders.entrySet()) {
String name = entry.getKey();
if (name == null)
continue; // http/1.1 line
List values = entry.getValue();
for (String value : values) {
addHeader(name, fixHeaderEncoding(value));
}
}
}
/**
Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that
and re-decode the string as UTF-8.
* @param val a header value string that may have been incorrectly decoded as 8859.
* @return a potentially re-decoded string.
*/
@Nullable
private static String fixHeaderEncoding(@Nullable String val) {
if (val == null) return val;
byte[] bytes = val.getBytes(ISO_8859_1);
if (looksLikeUtf8(bytes))
return new String(bytes, UTF_8);
else
return val;
}
private static boolean looksLikeUtf8(byte[] input) {
int i = 0;
// BOM:
if (input.length >= 3
&& (input[0] & 0xFF) == 0xEF
&& (input[1] & 0xFF) == 0xBB
&& (input[2] & 0xFF) == 0xBF) {
i = 3;
}
int end;
boolean foundNonAscii = false;
for (int j = input.length; i < j; ++i) {
int o = input[i];
if ((o & 0x80) == 0) {
continue; // ASCII
}
foundNonAscii = true;
// UTF-8 leading:
if ((o & 0xE0) == 0xC0) {
end = i + 1;
} else if ((o & 0xF0) == 0xE0) {
end = i + 2;
} else if ((o & 0xF8) == 0xF0) {
end = i + 3;
} else {
return false;
}
if (end >= input.length)
return false;
while (i < end) {
i++;
o = input[i];
if ((o & 0xC0) != 0x80) {
return false;
}
}
}
return foundNonAscii;
}
private @Nullable static String setOutputContentType(final Connection.Request req) {
final String contentType = req.header(CONTENT_TYPE);
String bound = null;
if (contentType != null) {
// no-op; don't add content type as already set (e.g. for requestBody())
// todo - if content type already set, we could add charset
// if user has set content type to multipart/form-data, auto add boundary.
if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) {
bound = DataUtil.mimeBoundary();
req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
}
}
else if (needsMultipart(req)) {
bound = DataUtil.mimeBoundary();
req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
} else {
req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset());
}
return bound;
}
private static void writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary) throws IOException {
final Collection data = req.data();
final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset())));
if (boundary != null) {
// boundary will be set if we're in multipart mode
for (Connection.KeyVal keyVal : data) {
w.write("--");
w.write(boundary);
w.write("\r\n");
w.write("Content-Disposition: form-data; name=\"");
w.write(encodeMimeName(keyVal.key())); // encodes " to %22
w.write("\"");
final InputStream input = keyVal.inputStream();
if (input != null) {
w.write("; filename=\"");
w.write(encodeMimeName(keyVal.value()));
w.write("\"\r\nContent-Type: ");
String contentType = keyVal.contentType();
w.write(contentType != null ? contentType : DefaultUploadType);
w.write("\r\n\r\n");
w.flush(); // flush
DataUtil.crossStreams(input, outputStream);
outputStream.flush();
} else {
w.write("\r\n\r\n");
w.write(keyVal.value());
}
w.write("\r\n");
}
w.write("--");
w.write(boundary);
w.write("--");
} else {
String body = req.requestBody();
if (body != null) {
// data will be in query string, we're sending a plaintext body
w.write(body);
}
else {
// regular form data (application/x-www-form-urlencoded)
boolean first = true;
for (Connection.KeyVal keyVal : data) {
if (!first)
w.append('&');
else
first = false;
w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset()));
w.write('=');
w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset()));
}
}
}
w.close();
}
// for get url reqs, serialise the data map into the url
private static void serialiseRequestUrl(Connection.Request req) throws IOException {
UrlBuilder in = new UrlBuilder(req.url());
for (Connection.KeyVal keyVal : req.data()) {
Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string.");
in.appendKeyVal(keyVal);
}
req.url(in.build());
req.data().clear(); // moved into url as get params
}
}
private static boolean needsMultipart(Connection.Request req) {
// multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary
for (Connection.KeyVal keyVal : req.data()) {
if (keyVal.hasInputStream())
return true;
}
return false;
}
public static class KeyVal implements Connection.KeyVal {
private String key;
private String value;
private @Nullable InputStream stream;
private @Nullable String contentType;
public static KeyVal create(String key, String value) {
return new KeyVal(key, value);
}
public static KeyVal create(String key, String filename, InputStream stream) {
return new KeyVal(key, filename)
.inputStream(stream);
}
private KeyVal(String key, String value) {
Validate.notEmptyParam(key, "key");
Validate.notNullParam(value, "value");
this.key = key;
this.value = value;
}
@Override
public KeyVal key(String key) {
Validate.notEmptyParam(key, "key");
this.key = key;
return this;
}
@Override
public String key() {
return key;
}
@Override
public KeyVal value(String value) {
Validate.notNullParam(value, "value");
this.value = value;
return this;
}
@Override
public String value() {
return value;
}
public KeyVal inputStream(InputStream inputStream) {
Validate.notNullParam(value, "inputStream");
this.stream = inputStream;
return this;
}
@Override
public InputStream inputStream() {
return stream;
}
@Override
public boolean hasInputStream() {
return stream != null;
}
@Override
public Connection.KeyVal contentType(String contentType) {
Validate.notEmpty(contentType);
this.contentType = contentType;
return this;
}
@Override
public String contentType() {
return contentType;
}
@Override
public String toString() {
return key + "=" + value;
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy