org.jsoup.Connection Maven / Gradle / Ivy
package org.jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import javax.annotation.Nullable;
import javax.net.ssl.SSLSocketFactory;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.CookieStore;
import java.net.Proxy;
import java.net.URL;
import java.util.Collection;
import java.util.List;
import java.util.Map;
/**
The Connection interface is a convenient HTTP client and session object to fetch content from the web, and parse them
into Documents.
To start a new session, use either {@link org.jsoup.Jsoup#newSession()} or {@link org.jsoup.Jsoup#connect(String)}.
Connections contain {@link Connection.Request} and {@link Connection.Response} objects (once executed). Configuration
settings (URL, timeout, useragent, etc) set on a session will be applied by default to each subsequent request.
To start a new request from the session, use {@link #newRequest()}.
Cookies are stored in memory for the duration of the session. For that reason, do not use one single session for all
requests in a long-lived application, or you are likely to run out of memory, unless care is taken to clean up the
cookie store. The cookie store for the session is available via {@link #cookieStore()}. You may provide your own
implementation via {@link #cookieStore(java.net.CookieStore)} before making requests.
Request configuration can be made using either the shortcut methods in Connection (e.g. {@link #userAgent(String)}),
or by methods in the Connection.Request object directly. All request configuration must be made before the request is
executed. When used as an ongoing session, initialize all defaults prior to making multi-threaded {@link
#newRequest()}s.
Note that the term "Connection" used here does not mean that a long-lived connection is held against a server for
the lifetime of the Connection object. A socket connection is only made at the point of request execution ({@link
#execute()}, {@link #get()}, or {@link #post()}), and the server's response consumed.
For multi-threaded implementations, it is important to use a {@link #newRequest()} for each request. The session may
be shared across threads but a given request, not.
*/
@SuppressWarnings("unused")
public interface Connection {
/**
* GET and POST http methods.
*/
enum Method {
GET(false), POST(true), PUT(true), DELETE(false), PATCH(true), HEAD(false), OPTIONS(false), TRACE(false);
private final boolean hasBody;
Method(boolean hasBody) {
this.hasBody = hasBody;
}
/**
* Check if this HTTP method has/needs a request body
* @return if body needed
*/
public final boolean hasBody() {
return hasBody;
}
}
/**
Creates a new request, using this Connection as the session-state and to initialize the connection settings (which may then be independently on the returned Connection.Request object).
@return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request
@since 1.14.1
*/
Connection newRequest();
/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
* @param url URL to connect to
* @return this Connection, for chaining
*/
Connection url(URL url);
/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
* @param url URL to connect to
* @return this Connection, for chaining
*/
Connection url(String url);
/**
* Set the proxy to use for this request. Set to null
to disable a previously set proxy.
* @param proxy proxy to use
* @return this Connection, for chaining
*/
Connection proxy(@Nullable Proxy proxy);
/**
* Set the HTTP proxy to use for this request.
* @param host the proxy hostname
* @param port the proxy port
* @return this Connection, for chaining
*/
Connection proxy(String host, int port);
/**
* Set the request user-agent header.
* @param userAgent user-agent to use
* @return this Connection, for chaining
* @see org.jsoup.helper.HttpConnection#DEFAULT_UA
*/
Connection userAgent(String userAgent);
/**
* Set the total request timeout duration. If a timeout occurs, an {@link java.net.SocketTimeoutException} will be thrown.
* The default timeout is 30 seconds (30,000 millis). A timeout of zero is treated as an infinite timeout.
*
Note that this timeout specifies the combined maximum duration of the connection time and the time to read
* the full response.
* @param millis number of milliseconds (thousandths of a second) before timing out connects or reads.
* @return this Connection, for chaining
* @see #maxBodySize(int)
*/
Connection timeout(int millis);
/**
* Set the maximum bytes to read from the (uncompressed) connection into the body, before the connection is closed,
* and the input truncated (i.e. the body content will be trimmed). The default maximum is 2MB. A max size of
* 0
is treated as an infinite amount (bounded only by your patience and the memory available on your
* machine).
*
* @param bytes number of bytes to read from the input before truncating
* @return this Connection, for chaining
*/
Connection maxBodySize(int bytes);
/**
* Set the request referrer (aka "referer") header.
* @param referrer referrer to use
* @return this Connection, for chaining
*/
Connection referrer(String referrer);
/**
* Configures the connection to (not) follow server redirects. By default this is true.
* @param followRedirects true if server redirects should be followed.
* @return this Connection, for chaining
*/
Connection followRedirects(boolean followRedirects);
/**
* Set the request method to use, GET or POST. Default is GET.
* @param method HTTP request method
* @return this Connection, for chaining
*/
Connection method(Method method);
/**
* Configures the connection to not throw exceptions when a HTTP error occurs. (4xx - 5xx, e.g. 404 or 500). By
* default this is false; an IOException is thrown if an error is encountered. If set to true, the
* response is populated with the error body, and the status message will reflect the error.
* @param ignoreHttpErrors - false (default) if HTTP errors should be ignored.
* @return this Connection, for chaining
*/
Connection ignoreHttpErrors(boolean ignoreHttpErrors);
/**
* Ignore the document's Content-Type when parsing the response. By default this is false, an unrecognised
* content-type will cause an IOException to be thrown. (This is to prevent producing garbage by attempting to parse
* a JPEG binary image, for example.) Set to true to force a parse attempt regardless of content type.
* @param ignoreContentType set to true if you would like the content type ignored on parsing the response into a
* Document.
* @return this Connection, for chaining
*/
Connection ignoreContentType(boolean ignoreContentType);
/**
* Set custom SSL socket factory
* @param sslSocketFactory custom SSL socket factory
* @return this Connection, for chaining
*/
Connection sslSocketFactory(SSLSocketFactory sslSocketFactory);
/**
* Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the
* request body for POSTs. A request may have multiple values of the same name.
* @param key data key
* @param value data value
* @return this Connection, for chaining
*/
Connection data(String key, String value);
/**
* Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the
* input stream.
* @param key data key (form item name)
* @param filename the name of the file to present to the remove server. Typically just the name, not path,
* component.
* @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}.
* You must close the InputStream in a {@code finally} block.
* @return this Connections, for chaining
* @see #data(String, String, InputStream, String) if you want to set the uploaded file's mimetype.
*/
Connection data(String key, String filename, InputStream inputStream);
/**
* Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the
* input stream.
* @param key data key (form item name)
* @param filename the name of the file to present to the remove server. Typically just the name, not path,
* component.
* @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}.
* @param contentType the Content Type (aka mimetype) to specify for this file.
* You must close the InputStream in a {@code finally} block.
* @return this Connections, for chaining
*/
Connection data(String key, String filename, InputStream inputStream, String contentType);
/**
* Adds all of the supplied data to the request data parameters
* @param data collection of data parameters
* @return this Connection, for chaining
*/
Connection data(Collection data);
/**
* Adds all of the supplied data to the request data parameters
* @param data map of data parameters
* @return this Connection, for chaining
*/
Connection data(Map data);
/**
Add one or more request {@code key, val} data parameter pairs.Multiple parameters may be set at once, e.g.:
.data("name", "jsoup", "language", "Java", "language", "English");
creates a query string like:
{@literal ?name=jsoup&language=Java&language=English}
For GET requests, data parameters will be sent on the request query string. For POST (and other methods that
contain a body), they will be sent as body form parameters, unless the body is explicitly set by {@link
#requestBody(String)}, in which case they will be query string parameters.
@param keyvals a set of key value pairs.
@return this Connection, for chaining
*/
Connection data(String... keyvals);
/**
* Get the data KeyVal for this key, if any
* @param key the data key
* @return null if not set
*/
@Nullable KeyVal data(String key);
/**
* Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set for URL
* encoded form key/value pairs. E.g.:
* Jsoup.connect(url)
* .requestBody(json)
* .header("Content-Type", "application/json")
* .post();
* If any data key/vals are supplied, they will be sent as URL query params.
* @return this Request, for chaining
*/
Connection requestBody(String body);
/**
* Set a request header.
* @param name header name
* @param value header value
* @return this Connection, for chaining
* @see org.jsoup.Connection.Request#headers()
*/
Connection header(String name, String value);
/**
* Adds each of the supplied headers to the request.
* @param headers map of headers name {@literal ->} value pairs
* @return this Connection, for chaining
* @see org.jsoup.Connection.Request#headers()
*/
Connection headers(Map headers);
/**
* Set a cookie to be sent in the request.
* @param name name of cookie
* @param value value of cookie
* @return this Connection, for chaining
*/
Connection cookie(String name, String value);
/**
* Adds each of the supplied cookies to the request.
* @param cookies map of cookie name {@literal ->} value pairs
* @return this Connection, for chaining
*/
Connection cookies(Map cookies);
/**
Provide a custom or pre-filled CookieStore to be used on requests made by this Connection.
@param cookieStore a cookie store to use for subsequent requests
@return this Connection, for chaining
@since 1.14.1
*/
Connection cookieStore(CookieStore cookieStore);
/**
Get the cookie store used by this Connection.
@return the cookie store
@since 1.14.1
*/
CookieStore cookieStore();
/**
* Provide an alternate parser to use when parsing the response to a Document. If not set, defaults to the HTML
* parser, unless the response content-type is XML, in which case the XML parser is used.
* @param parser alternate parser
* @return this Connection, for chaining
*/
Connection parser(Parser parser);
/**
* Sets the default post data character set for x-www-form-urlencoded post data
* @param charset character set to encode post data
* @return this Connection, for chaining
*/
Connection postDataCharset(String charset);
/**
* Execute the request as a GET, and parse the result.
* @return parsed Document
* @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed
* @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored
* @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored
* @throws java.net.SocketTimeoutException if the connection times out
* @throws IOException on error
*/
Document get() throws IOException;
/**
* Execute the request as a POST, and parse the result.
* @return parsed Document
* @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed
* @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored
* @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored
* @throws java.net.SocketTimeoutException if the connection times out
* @throws IOException on error
*/
Document post() throws IOException;
/**
* Execute the request.
* @return a response object
* @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed
* @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored
* @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored
* @throws java.net.SocketTimeoutException if the connection times out
* @throws IOException on error
*/
Response execute() throws IOException;
/**
* Get the request object associated with this connection
* @return request
*/
Request request();
/**
* Set the connection's request
* @param request new request object
* @return this Connection, for chaining
*/
Connection request(Request request);
/**
* Get the response, once the request has been executed.
* @return response
* @throws IllegalArgumentException if called before the response has been executed.
*/
Response response();
/**
* Set the connection's response
* @param response new response
* @return this Connection, for chaining
*/
Connection response(Response response);
/**
* Common methods for Requests and Responses
* @param Type of Base, either Request or Response
*/
@SuppressWarnings("UnusedReturnValue")
interface Base> {
/**
* Get the URL of this Request or Response. For redirected responses, this will be the final destination URL.
* @return URL
* @throws IllegalArgumentException if called on a Request that was created without a URL.
*/
URL url();
/**
* Set the URL
* @param url new URL
* @return this, for chaining
*/
T url(URL url);
/**
* Get the request method, which defaults to GET
* @return method
*/
Method method();
/**
* Set the request method
* @param method new method
* @return this, for chaining
*/
T method(Method method);
/**
* Get the value of a header. If there is more than one header value with the same name, the headers are returned
* comma seperated, per rfc2616-sec4.
*
* Header names are case insensitive.
*
* @param name name of header (case insensitive)
* @return value of header, or null if not set.
* @see #hasHeader(String)
* @see #cookie(String)
*/
@Nullable String header(String name);
/**
* Get the values of a header.
* @param name header name, case insensitive.
* @return a list of values for this header, or an empty list if not set.
*/
List headers(String name);
/**
* Set a header. This method will overwrite any existing header with the same case insensitive name. (If there
* is more than one value for this header, this method will update the first matching header.
* @param name Name of header
* @param value Value of header
* @return this, for chaining
* @see #addHeader(String, String)
*/
T header(String name, String value);
/**
* Add a header. The header will be added regardless of whether a header with the same name already exists.
* @param name Name of new header
* @param value Value of new header
* @return this, for chaining
*/
T addHeader(String name, String value);
/**
* Check if a header is present
* @param name name of header (case insensitive)
* @return if the header is present in this request/response
*/
boolean hasHeader(String name);
/**
* Check if a header is present, with the given value
* @param name header name (case insensitive)
* @param value value (case insensitive)
* @return if the header and value pair are set in this req/res
*/
boolean hasHeaderWithValue(String name, String value);
/**
* Remove headers by name. If there is more than one header with this name, they will all be removed.
* @param name name of header to remove (case insensitive)
* @return this, for chaining
*/
T removeHeader(String name);
/**
* Retrieve all of the request/response header names and corresponding values as a map. For headers with multiple
* values, only the first header is returned.
* Note that this is a view of the headers only, and changes made to this map will not be reflected in the
* request/response object.
* @return headers
* @see #multiHeaders()
*/
Map headers();
/**
* Retreive all of the headers, keyed by the header name, and with a list of values per header.
* @return a list of multiple values per header.
*/
Map> multiHeaders();
/**
* Get a cookie value by name from this request/response.
*
* Response objects have a simplified cookie model. Each cookie set in the response is added to the response
* object's cookie key=value map. The cookie's path, domain, and expiry date are ignored.
*
* @param name name of cookie to retrieve.
* @return value of cookie, or null if not set
*/
@Nullable String cookie(String name);
/**
* Set a cookie in this request/response.
* @param name name of cookie
* @param value value of cookie
* @return this, for chaining
*/
T cookie(String name, String value);
/**
* Check if a cookie is present
* @param name name of cookie
* @return if the cookie is present in this request/response
*/
boolean hasCookie(String name);
/**
* Remove a cookie by name
* @param name name of cookie to remove
* @return this, for chaining
*/
T removeCookie(String name);
/**
* Retrieve all of the request/response cookies as a map
* @return cookies
*/
Map cookies();
}
/**
* Represents a HTTP request.
*/
@SuppressWarnings("UnusedReturnValue")
interface Request extends Base {
/**
* Get the proxy used for this request.
* @return the proxy; null
if not enabled.
*/
@Nullable Proxy proxy();
/**
* Update the proxy for this request.
* @param proxy the proxy ot use; null
to disable.
* @return this Request, for chaining
*/
Request proxy(@Nullable Proxy proxy);
/**
* Set the HTTP proxy to use for this request.
* @param host the proxy hostname
* @param port the proxy port
* @return this Connection, for chaining
*/
Request proxy(String host, int port);
/**
* Get the request timeout, in milliseconds.
* @return the timeout in milliseconds.
*/
int timeout();
/**
* Update the request timeout.
* @param millis timeout, in milliseconds
* @return this Request, for chaining
*/
Request timeout(int millis);
/**
* Get the maximum body size, in bytes.
* @return the maximum body size, in bytes.
*/
int maxBodySize();
/**
* Update the maximum body size, in bytes.
* @param bytes maximum body size, in bytes.
* @return this Request, for chaining
*/
Request maxBodySize(int bytes);
/**
* Get the current followRedirects configuration.
* @return true if followRedirects is enabled.
*/
boolean followRedirects();
/**
* Configures the request to (not) follow server redirects. By default this is true.
* @param followRedirects true if server redirects should be followed.
* @return this Request, for chaining
*/
Request followRedirects(boolean followRedirects);
/**
* Get the current ignoreHttpErrors configuration.
* @return true if errors will be ignored; false (default) if HTTP errors will cause an IOException to be
* thrown.
*/
boolean ignoreHttpErrors();
/**
* Configures the request to ignore HTTP errors in the response.
* @param ignoreHttpErrors set to true to ignore HTTP errors.
* @return this Request, for chaining
*/
Request ignoreHttpErrors(boolean ignoreHttpErrors);
/**
* Get the current ignoreContentType configuration.
* @return true if invalid content-types will be ignored; false (default) if they will cause an IOException to
* be thrown.
*/
boolean ignoreContentType();
/**
* Configures the request to ignore the Content-Type of the response.
* @param ignoreContentType set to true to ignore the content type.
* @return this Request, for chaining
*/
Request ignoreContentType(boolean ignoreContentType);
/**
* Get the current custom SSL socket factory, if any.
* @return custom SSL socket factory if set, null otherwise
*/
@Nullable SSLSocketFactory sslSocketFactory();
/**
* Set a custom SSL socket factory.
* @param sslSocketFactory SSL socket factory
*/
void sslSocketFactory(SSLSocketFactory sslSocketFactory);
/**
* Add a data parameter to the request
* @param keyval data to add.
* @return this Request, for chaining
*/
Request data(KeyVal keyval);
/**
* Get all of the request's data parameters
* @return collection of keyvals
*/
Collection data();
/**
* Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set for URL
* encoded form key/value pairs. E.g.:
* Jsoup.connect(url)
* .requestBody(json)
* .header("Content-Type", "application/json")
* .post();
* If any data key/vals are supplied, they will be sent as URL query params.
* @param body to use as the request body. Set to null to clear a previously set body.
* @return this Request, for chaining
*/
Request requestBody(@Nullable String body);
/**
* Get the current request body.
* @return null if not set.
*/
@Nullable String requestBody();
/**
* Specify the parser to use when parsing the document.
* @param parser parser to use.
* @return this Request, for chaining
*/
Request parser(Parser parser);
/**
* Get the current parser to use when parsing the document.
* @return current Parser
*/
Parser parser();
/**
* Sets the post data character set for x-www-form-urlencoded post data
* @param charset character set to encode post data
* @return this Request, for chaining
*/
Request postDataCharset(String charset);
/**
* Gets the post data character set for x-www-form-urlencoded post data
* @return character set to encode post data
*/
String postDataCharset();
}
/**
* Represents a HTTP response.
*/
interface Response extends Base {
/**
* Get the status code of the response.
* @return status code
*/
int statusCode();
/**
* Get the status message of the response.
* @return status message
*/
String statusMessage();
/**
* Get the character set name of the response, derived from the content-type header.
* @return character set name if set, null if not
*/
@Nullable String charset();
/**
* Set / override the response character set. When the document body is parsed it will be with this charset.
* @param charset to decode body as
* @return this Response, for chaining
*/
Response charset(String charset);
/**
* Get the response content type (e.g. "text/html");
* @return the response content type, or null if one was not set
*/
@Nullable String contentType();
/**
* Read and parse the body of the response as a Document. If you intend to parse the same response multiple
* times, you should {@link #bufferUp()} first.
* @return a parsed Document
* @throws IOException on error
*/
Document parse() throws IOException;
/**
* Get the body of the response as a plain string.
* @return body
*/
String body();
/**
* Get the body of the response as an array of bytes.
* @return body bytes
*/
byte[] bodyAsBytes();
/**
* Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the
* same connection response (otherwise, once the response is read, its InputStream will have been drained and
* may not be re-read). Calling {@link #body() } or {@link #bodyAsBytes()} has the same effect.
* @return this response, for chaining
* @throws UncheckedIOException if an IO exception occurs during buffering.
*/
Response bufferUp();
/**
* Get the body of the response as a (buffered) InputStream. You should close the input stream when you're done with it.
* Other body methods (like bufferUp, body, parse, etc) will not work in conjunction with this method.
* This method is useful for writing large responses to disk, without buffering them completely into memory first.
* @return the response body input stream
*/
BufferedInputStream bodyStream();
}
/**
* A Key:Value tuple(+), used for form data.
*/
interface KeyVal {
/**
* Update the key of a keyval
* @param key new key
* @return this KeyVal, for chaining
*/
KeyVal key(String key);
/**
* Get the key of a keyval
* @return the key
*/
String key();
/**
* Update the value of a keyval
* @param value the new value
* @return this KeyVal, for chaining
*/
KeyVal value(String value);
/**
* Get the value of a keyval
* @return the value
*/
String value();
/**
* Add or update an input stream to this keyVal
* @param inputStream new input stream
* @return this KeyVal, for chaining
*/
KeyVal inputStream(InputStream inputStream);
/**
* Get the input stream associated with this keyval, if any
* @return input stream if set, or null
*/
@Nullable InputStream inputStream();
/**
* Does this keyval have an input stream?
* @return true if this keyval does indeed have an input stream
*/
boolean hasInputStream();
/**
* Set the Content Type header used in the MIME body (aka mimetype) when uploading files.
* Only useful if {@link #inputStream(InputStream)} is set.
* Will default to {@code application/octet-stream}.
* @param contentType the new content type
* @return this KeyVal
*/
KeyVal contentType(String contentType);
/**
* Get the current Content Type, or {@code null} if not set.
* @return the current Content Type.
*/
@Nullable String contentType();
}
}