All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gargoylesoftware.htmlunit.WebRequest Maven / Gradle / Ivy

There is a newer version: 2.70.0
Show newest version
/*
 * Copyright (c) 2002-2015 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gargoylesoftware.htmlunit;

import java.io.Serializable;
import java.net.IDN;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.http.auth.Credentials;
import org.apache.http.auth.UsernamePasswordCredentials;

import com.gargoylesoftware.htmlunit.util.NameValuePair;
import com.gargoylesoftware.htmlunit.util.UrlUtils;

/**
 * Parameter object for making web requests.
 *
 * @version $Revision: 10040 $
 * @author Brad Clarke
 * @author Hans Donner
 * @author Ahmed Ashour
 * @author Marc Guillemot
 * @author Rodney Gitzel
 * @author Ronald Brill
 */
public class WebRequest implements Serializable {

    // private static final Log LOG = LogFactory.getLog(WebRequest.class);
    private static final Pattern DOT_PATTERN = Pattern.compile("/\\./");
    private static final Pattern DOT_DOT_PATTERN = Pattern.compile("/(?!\\.\\.)[^/]*/\\.\\./");
    private static final Pattern REMOVE_DOTS_PATTERN = Pattern.compile("^/(\\.\\.?/)*");

    private String url_; // String instead of java.net.URL because "about:blank" URLs don't serialize correctly
    private String proxyHost_;
    private int proxyPort_;
    private boolean isSocksProxy_;
    private HttpMethod httpMethod_ = HttpMethod.GET;
    private FormEncodingType encodingType_ = FormEncodingType.URL_ENCODED;
    private Map additionalHeaders_ = new HashMap<>();
    private Credentials urlCredentials_;
    private Credentials credentials_;
    private String charset_ = TextUtil.DEFAULT_CHARSET;

    /* These two are mutually exclusive; additionally, requestBody_ should only be set for POST requests. */
    private List requestParameters_ = Collections.emptyList();
    private String requestBody_;

    /**
     * Instantiates a {@link WebRequest} for the specified URL.
     * @param url the target URL
     * @param acceptHeader the accept header to use
     */
    public WebRequest(final URL url, final String acceptHeader) {
        setUrl(url);
        setAdditionalHeader("Accept", acceptHeader);
        setAdditionalHeader("Accept-Encoding", "gzip, deflate");
    }

    /**
     * Instantiates a {@link WebRequest} for the specified URL.
     * @param url the target URL
     */
    public WebRequest(final URL url) {
        this(url, "*/*");
    }

    /**
     * Instantiates a {@link WebRequest} for the specified URL using the specified HTTP submit method.
     * @param url the target URL
     * @param submitMethod the HTTP submit method to use
     */
    public WebRequest(final URL url, final HttpMethod submitMethod) {
        this(url);
        setHttpMethod(submitMethod);
    }

    /**
     * Returns the target URL.
     * @return the target URL
     */
    public URL getUrl() {
        return UrlUtils.toUrlSafe(url_);
    }

    /**
     * Sets the target URL. The URL may be simplified if needed (for instance eliminating
     * irrelevant path portions like "/./").
     * @param url the target URL
     */
    public void setUrl(URL url) {
        if (url != null) {
            final String path = url.getPath();
            if (path.isEmpty()) {
                if (!url.getFile().isEmpty() || url.getProtocol().startsWith("http")) {
                    url = buildUrlWithNewFile(url, "/" + url.getFile());
                }
            }
            else if (path.contains("/.")) {
                final String query = (url.getQuery() != null) ? "?" + url.getQuery() : "";
                url = buildUrlWithNewFile(url, removeDots(path) + query);
            }
            final String idn = IDN.toASCII(url.getHost());
            if (!idn.equals(url.getHost())) {
                try {
                    url = new URL(url.getProtocol(), idn, url.getPort(), url.getFile());
                }
                catch (final Exception e) {
                    throw new RuntimeException("Cannot change hostname of URL: " + url.toExternalForm(), e);
                }
            }
            url_ = url.toExternalForm();

            // http://john.smith:secret@localhost
            final String userInfo = url.getUserInfo();
            if (userInfo != null) {
                final int splitPos = userInfo.indexOf(':');
                if (splitPos == -1) {
                    urlCredentials_ = new UsernamePasswordCredentials(userInfo, "");
                }
                else {
                    final String username = userInfo.substring(0, splitPos);
                    final String password = userInfo.substring(splitPos + 1);
                    urlCredentials_ = new UsernamePasswordCredentials(username, password);
                }
            }
        }
        else {
            url_ = null;
        }
    }

    /*
     * Strip a URL string of "/./" and "/../" occurrences.
     * 

* One trick here is to repeatedly create new matchers on a given * pattern, so that we can see whether it needs to be re-applied; * unfortunately .replaceAll() doesn't re-process its own output, * so if we create a new match with a replacement, it is missed. */ private String removeDots(final String path) { String newPath = path; // remove occurrences at the beginning newPath = REMOVE_DOTS_PATTERN.matcher(newPath).replaceAll("/"); if ("/..".equals(newPath)) { newPath = "/"; } // single dots have no effect, so just remove them while (DOT_PATTERN.matcher(newPath).find()) { newPath = DOT_PATTERN.matcher(newPath).replaceAll("/"); } // mid-path double dots should be removed WITH the previous subdirectory and replaced // with "/" BUT ONLY IF that subdirectory's not also ".." (a regex lookahead helps with this) while (DOT_DOT_PATTERN.matcher(newPath).find()) { newPath = DOT_DOT_PATTERN.matcher(newPath).replaceAll("/"); } return newPath; } private URL buildUrlWithNewFile(URL url, String newFile) { try { if (url.getRef() != null) { newFile += '#' + url.getRef(); } if ("file".equals(url.getProtocol()) && url.getAuthority() != null && url.getAuthority().endsWith(":")) { newFile = ":" + newFile; } url = new URL(url.getProtocol(), url.getHost(), url.getPort(), newFile); } catch (final Exception e) { throw new RuntimeException("Cannot set URL: " + url.toExternalForm(), e); } return url; } /** * Returns the proxy host to use. * @return the proxy host to use */ public String getProxyHost() { return proxyHost_; } /** * Sets the proxy host to use. * @param proxyHost the proxy host to use */ public void setProxyHost(final String proxyHost) { proxyHost_ = proxyHost; } /** * Returns the proxy port to use. * @return the proxy port to use */ public int getProxyPort() { return proxyPort_; } /** * Sets the proxy port to use. * @param proxyPort the proxy port to use */ public void setProxyPort(final int proxyPort) { proxyPort_ = proxyPort; } /** * Returns whether SOCKS proxy or not. * @return whether SOCKS proxy or not */ public boolean isSocksProxy() { return isSocksProxy_; } /** * Sets whether SOCKS proxy or not. * @param isSocksProxy whether SOCKS proxy or not */ public void setSocksProxy(final boolean isSocksProxy) { isSocksProxy_ = isSocksProxy; } /** * Returns the form encoding type to use. * @return the form encoding type to use */ public FormEncodingType getEncodingType() { return encodingType_; } /** * Sets the form encoding type to use. * @param encodingType the form encoding type to use */ public void setEncodingType(final FormEncodingType encodingType) { encodingType_ = encodingType; } /** * Retrieves the request parameters to use. If set, these request parameters will overwrite any * request parameters which may be present in the {@link #getUrl() URL}. Should not be used in * combination with the {@link #setRequestBody(String) request body}. * @return the request parameters to use */ public List getRequestParameters() { return requestParameters_; } /** * Sets the request parameters to use. If set, these request parameters will overwrite any request * parameters which may be present in the {@link #getUrl() URL}. Should not be used in combination * with the {@link #setRequestBody(String) request body}. * @param requestParameters the request parameters to use * @throws RuntimeException if the request body has already been set */ public void setRequestParameters(final List requestParameters) throws RuntimeException { if (requestBody_ != null) { final String msg = "Trying to set the request parameters, but the request body has already been specified;" + "the two are mutually exclusive!"; throw new RuntimeException(msg); } requestParameters_ = requestParameters; } /** * Returns the body content to be submitted if this is a POST request. Ignored for all other request * types. Should not be used in combination with {@link #setRequestParameters(List) request parameters}. * @return the body content to be submitted if this is a POST request */ public String getRequestBody() { return requestBody_; } /** * Sets the body content to be submitted if this is a POST or PUT request. * Ignored for all other request types. * Should not be used in combination with {@link #setRequestParameters(List) request parameters}. * @param requestBody the body content to be submitted if this is a POST request * @throws RuntimeException if the request parameters have already been set * or this is not a POST or PUT request */ public void setRequestBody(final String requestBody) throws RuntimeException { if (requestParameters_ != null && !requestParameters_.isEmpty()) { final String msg = "Trying to set the request body, but the request parameters have already been specified;" + "the two are mutually exclusive!"; throw new RuntimeException(msg); } if (httpMethod_ != HttpMethod.POST && httpMethod_ != HttpMethod.PUT) { final String msg = "The request body may only be set for POST or PUT requests!"; throw new RuntimeException(msg); } requestBody_ = requestBody; } /** * Returns the HTTP submit method to use. * @return the HTTP submit method to use */ public HttpMethod getHttpMethod() { return httpMethod_; } /** * Sets the HTTP submit method to use. * @param submitMethod the HTTP submit method to use */ public void setHttpMethod(final HttpMethod submitMethod) { httpMethod_ = submitMethod; } /** * Returns the additional HTTP headers to use. * @return the additional HTTP headers to use */ public Map getAdditionalHeaders() { return additionalHeaders_; } /** * Sets the additional HTTP headers to use. * @param additionalHeaders the additional HTTP headers to use */ public void setAdditionalHeaders(final Map additionalHeaders) { additionalHeaders_ = additionalHeaders; } /** * Returns whether the specified header name is already included in the additional HTTP headers. * @param name the name of the additional HTTP header * @return true if the specified header name is included in the additional HTTP headers */ public boolean isAdditionalHeader(final String name) { for (final String key : additionalHeaders_.keySet()) { if (name.equalsIgnoreCase(key)) { return true; } } return false; } /** * Sets the specified name/value pair in the additional HTTP headers. * @param name the name of the additional HTTP header * @param value the value of the additional HTTP header */ public void setAdditionalHeader(String name, final String value) { for (final String key : additionalHeaders_.keySet()) { if (name.equalsIgnoreCase(key)) { name = key; break; } } additionalHeaders_.put(name, value); } /** * Removed the specified name/value pair from the additional HTTP headers. * @param name the name of the additional HTTP header */ public void removeAdditionalHeader(String name) { for (final String key : additionalHeaders_.keySet()) { if (name.equalsIgnoreCase(key)) { name = key; break; } } additionalHeaders_.remove(name); } /** * Returns the credentials to use. * @return the credentials if set as part of the url */ public Credentials getUrlCredentials() { return urlCredentials_; } /** * Returns the credentials to use. * @return the credentials if set from the external builder */ public Credentials getCredentials() { return credentials_; } /** * Sets the credentials to use. * @param credentials the credentials to use */ public void setCredentials(final Credentials credentials) { credentials_ = credentials; } /** * Returns the character set to use to perform the request. * @return the character set to use to perform the request */ public String getCharset() { return charset_; } /** * Sets the character set to use to perform the request. The default value * is {@link TextUtil#DEFAULT_CHARSET}. * @param charset the character set to use to perform the request */ public void setCharset(final String charset) { charset_ = charset; } /** * Returns a string representation of this object. * @return a string representation of this object */ @Override public String toString() { final StringBuilder buffer = new StringBuilder(); buffer.append(getClass().getSimpleName()); buffer.append("[<"); buffer.append("url=\"" + url_ + '"'); buffer.append(", " + httpMethod_); buffer.append(", " + encodingType_); buffer.append(", " + requestParameters_); buffer.append(", " + additionalHeaders_); buffer.append(", " + credentials_); buffer.append(">]"); return buffer.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy