com.gargoylesoftware.htmlunit.WebRequest Maven / Gradle / Ivy
Show all versions of htmlunit Show documentation
/*
* Copyright (c) 2002-2015 Gargoyle Software Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.gargoylesoftware.htmlunit;
import java.io.Serializable;
import java.net.IDN;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
import com.gargoylesoftware.htmlunit.util.UrlUtils;
/**
* Parameter object for making web requests.
*
* @version $Revision: 10040 $
* @author Brad Clarke
* @author Hans Donner
* @author Ahmed Ashour
* @author Marc Guillemot
* @author Rodney Gitzel
* @author Ronald Brill
*/
public class WebRequest implements Serializable {
// private static final Log LOG = LogFactory.getLog(WebRequest.class);
private static final Pattern DOT_PATTERN = Pattern.compile("/\\./");
private static final Pattern DOT_DOT_PATTERN = Pattern.compile("/(?!\\.\\.)[^/]*/\\.\\./");
private static final Pattern REMOVE_DOTS_PATTERN = Pattern.compile("^/(\\.\\.?/)*");
private String url_; // String instead of java.net.URL because "about:blank" URLs don't serialize correctly
private String proxyHost_;
private int proxyPort_;
private boolean isSocksProxy_;
private HttpMethod httpMethod_ = HttpMethod.GET;
private FormEncodingType encodingType_ = FormEncodingType.URL_ENCODED;
private Map additionalHeaders_ = new HashMap<>();
private Credentials urlCredentials_;
private Credentials credentials_;
private String charset_ = TextUtil.DEFAULT_CHARSET;
/* These two are mutually exclusive; additionally, requestBody_ should only be set for POST requests. */
private List requestParameters_ = Collections.emptyList();
private String requestBody_;
/**
* Instantiates a {@link WebRequest} for the specified URL.
* @param url the target URL
* @param acceptHeader the accept header to use
*/
public WebRequest(final URL url, final String acceptHeader) {
setUrl(url);
setAdditionalHeader("Accept", acceptHeader);
setAdditionalHeader("Accept-Encoding", "gzip, deflate");
}
/**
* Instantiates a {@link WebRequest} for the specified URL.
* @param url the target URL
*/
public WebRequest(final URL url) {
this(url, "*/*");
}
/**
* Instantiates a {@link WebRequest} for the specified URL using the specified HTTP submit method.
* @param url the target URL
* @param submitMethod the HTTP submit method to use
*/
public WebRequest(final URL url, final HttpMethod submitMethod) {
this(url);
setHttpMethod(submitMethod);
}
/**
* Returns the target URL.
* @return the target URL
*/
public URL getUrl() {
return UrlUtils.toUrlSafe(url_);
}
/**
* Sets the target URL. The URL may be simplified if needed (for instance eliminating
* irrelevant path portions like "/./").
* @param url the target URL
*/
public void setUrl(URL url) {
if (url != null) {
final String path = url.getPath();
if (path.isEmpty()) {
if (!url.getFile().isEmpty() || url.getProtocol().startsWith("http")) {
url = buildUrlWithNewFile(url, "/" + url.getFile());
}
}
else if (path.contains("/.")) {
final String query = (url.getQuery() != null) ? "?" + url.getQuery() : "";
url = buildUrlWithNewFile(url, removeDots(path) + query);
}
final String idn = IDN.toASCII(url.getHost());
if (!idn.equals(url.getHost())) {
try {
url = new URL(url.getProtocol(), idn, url.getPort(), url.getFile());
}
catch (final Exception e) {
throw new RuntimeException("Cannot change hostname of URL: " + url.toExternalForm(), e);
}
}
url_ = url.toExternalForm();
// http://john.smith:secret@localhost
final String userInfo = url.getUserInfo();
if (userInfo != null) {
final int splitPos = userInfo.indexOf(':');
if (splitPos == -1) {
urlCredentials_ = new UsernamePasswordCredentials(userInfo, "");
}
else {
final String username = userInfo.substring(0, splitPos);
final String password = userInfo.substring(splitPos + 1);
urlCredentials_ = new UsernamePasswordCredentials(username, password);
}
}
}
else {
url_ = null;
}
}
/*
* Strip a URL string of "/./" and "/../" occurrences.
*
* One trick here is to repeatedly create new matchers on a given
* pattern, so that we can see whether it needs to be re-applied;
* unfortunately .replaceAll() doesn't re-process its own output,
* so if we create a new match with a replacement, it is missed.
*/
private String removeDots(final String path) {
String newPath = path;
// remove occurrences at the beginning
newPath = REMOVE_DOTS_PATTERN.matcher(newPath).replaceAll("/");
if ("/..".equals(newPath)) {
newPath = "/";
}
// single dots have no effect, so just remove them
while (DOT_PATTERN.matcher(newPath).find()) {
newPath = DOT_PATTERN.matcher(newPath).replaceAll("/");
}
// mid-path double dots should be removed WITH the previous subdirectory and replaced
// with "/" BUT ONLY IF that subdirectory's not also ".." (a regex lookahead helps with this)
while (DOT_DOT_PATTERN.matcher(newPath).find()) {
newPath = DOT_DOT_PATTERN.matcher(newPath).replaceAll("/");
}
return newPath;
}
private URL buildUrlWithNewFile(URL url, String newFile) {
try {
if (url.getRef() != null) {
newFile += '#' + url.getRef();
}
if ("file".equals(url.getProtocol()) && url.getAuthority() != null && url.getAuthority().endsWith(":")) {
newFile = ":" + newFile;
}
url = new URL(url.getProtocol(), url.getHost(), url.getPort(), newFile);
}
catch (final Exception e) {
throw new RuntimeException("Cannot set URL: " + url.toExternalForm(), e);
}
return url;
}
/**
* Returns the proxy host to use.
* @return the proxy host to use
*/
public String getProxyHost() {
return proxyHost_;
}
/**
* Sets the proxy host to use.
* @param proxyHost the proxy host to use
*/
public void setProxyHost(final String proxyHost) {
proxyHost_ = proxyHost;
}
/**
* Returns the proxy port to use.
* @return the proxy port to use
*/
public int getProxyPort() {
return proxyPort_;
}
/**
* Sets the proxy port to use.
* @param proxyPort the proxy port to use
*/
public void setProxyPort(final int proxyPort) {
proxyPort_ = proxyPort;
}
/**
* Returns whether SOCKS proxy or not.
* @return whether SOCKS proxy or not
*/
public boolean isSocksProxy() {
return isSocksProxy_;
}
/**
* Sets whether SOCKS proxy or not.
* @param isSocksProxy whether SOCKS proxy or not
*/
public void setSocksProxy(final boolean isSocksProxy) {
isSocksProxy_ = isSocksProxy;
}
/**
* Returns the form encoding type to use.
* @return the form encoding type to use
*/
public FormEncodingType getEncodingType() {
return encodingType_;
}
/**
* Sets the form encoding type to use.
* @param encodingType the form encoding type to use
*/
public void setEncodingType(final FormEncodingType encodingType) {
encodingType_ = encodingType;
}
/**
* Retrieves the request parameters to use. If set, these request parameters will overwrite any
* request parameters which may be present in the {@link #getUrl() URL}. Should not be used in
* combination with the {@link #setRequestBody(String) request body}.
* @return the request parameters to use
*/
public List getRequestParameters() {
return requestParameters_;
}
/**
* Sets the request parameters to use. If set, these request parameters will overwrite any request
* parameters which may be present in the {@link #getUrl() URL}. Should not be used in combination
* with the {@link #setRequestBody(String) request body}.
* @param requestParameters the request parameters to use
* @throws RuntimeException if the request body has already been set
*/
public void setRequestParameters(final List requestParameters) throws RuntimeException {
if (requestBody_ != null) {
final String msg = "Trying to set the request parameters, but the request body has already been specified;"
+ "the two are mutually exclusive!";
throw new RuntimeException(msg);
}
requestParameters_ = requestParameters;
}
/**
* Returns the body content to be submitted if this is a POST request. Ignored for all other request
* types. Should not be used in combination with {@link #setRequestParameters(List) request parameters}.
* @return the body content to be submitted if this is a POST request
*/
public String getRequestBody() {
return requestBody_;
}
/**
* Sets the body content to be submitted if this is a POST or PUT request.
* Ignored for all other request types.
* Should not be used in combination with {@link #setRequestParameters(List) request parameters}.
* @param requestBody the body content to be submitted if this is a POST request
* @throws RuntimeException if the request parameters have already been set
* or this is not a POST or PUT request
*/
public void setRequestBody(final String requestBody) throws RuntimeException {
if (requestParameters_ != null && !requestParameters_.isEmpty()) {
final String msg = "Trying to set the request body, but the request parameters have already been specified;"
+ "the two are mutually exclusive!";
throw new RuntimeException(msg);
}
if (httpMethod_ != HttpMethod.POST && httpMethod_ != HttpMethod.PUT) {
final String msg = "The request body may only be set for POST or PUT requests!";
throw new RuntimeException(msg);
}
requestBody_ = requestBody;
}
/**
* Returns the HTTP submit method to use.
* @return the HTTP submit method to use
*/
public HttpMethod getHttpMethod() {
return httpMethod_;
}
/**
* Sets the HTTP submit method to use.
* @param submitMethod the HTTP submit method to use
*/
public void setHttpMethod(final HttpMethod submitMethod) {
httpMethod_ = submitMethod;
}
/**
* Returns the additional HTTP headers to use.
* @return the additional HTTP headers to use
*/
public Map getAdditionalHeaders() {
return additionalHeaders_;
}
/**
* Sets the additional HTTP headers to use.
* @param additionalHeaders the additional HTTP headers to use
*/
public void setAdditionalHeaders(final Map additionalHeaders) {
additionalHeaders_ = additionalHeaders;
}
/**
* Returns whether the specified header name is already included in the additional HTTP headers.
* @param name the name of the additional HTTP header
* @return true if the specified header name is included in the additional HTTP headers
*/
public boolean isAdditionalHeader(final String name) {
for (final String key : additionalHeaders_.keySet()) {
if (name.equalsIgnoreCase(key)) {
return true;
}
}
return false;
}
/**
* Sets the specified name/value pair in the additional HTTP headers.
* @param name the name of the additional HTTP header
* @param value the value of the additional HTTP header
*/
public void setAdditionalHeader(String name, final String value) {
for (final String key : additionalHeaders_.keySet()) {
if (name.equalsIgnoreCase(key)) {
name = key;
break;
}
}
additionalHeaders_.put(name, value);
}
/**
* Removed the specified name/value pair from the additional HTTP headers.
* @param name the name of the additional HTTP header
*/
public void removeAdditionalHeader(String name) {
for (final String key : additionalHeaders_.keySet()) {
if (name.equalsIgnoreCase(key)) {
name = key;
break;
}
}
additionalHeaders_.remove(name);
}
/**
* Returns the credentials to use.
* @return the credentials if set as part of the url
*/
public Credentials getUrlCredentials() {
return urlCredentials_;
}
/**
* Returns the credentials to use.
* @return the credentials if set from the external builder
*/
public Credentials getCredentials() {
return credentials_;
}
/**
* Sets the credentials to use.
* @param credentials the credentials to use
*/
public void setCredentials(final Credentials credentials) {
credentials_ = credentials;
}
/**
* Returns the character set to use to perform the request.
* @return the character set to use to perform the request
*/
public String getCharset() {
return charset_;
}
/**
* Sets the character set to use to perform the request. The default value
* is {@link TextUtil#DEFAULT_CHARSET}.
* @param charset the character set to use to perform the request
*/
public void setCharset(final String charset) {
charset_ = charset;
}
/**
* Returns a string representation of this object.
* @return a string representation of this object
*/
@Override
public String toString() {
final StringBuilder buffer = new StringBuilder();
buffer.append(getClass().getSimpleName());
buffer.append("[<");
buffer.append("url=\"" + url_ + '"');
buffer.append(", " + httpMethod_);
buffer.append(", " + encodingType_);
buffer.append(", " + requestParameters_);
buffer.append(", " + additionalHeaders_);
buffer.append(", " + credentials_);
buffer.append(">]");
return buffer.toString();
}
}