All Downloads are FREE. Search and download functionalities are using the official Maven repository.

okhttp3.HttpUrl Maven / Gradle / Ivy

There is a newer version: 5.0.0-alpha.14
Show newest version
/*
 * Copyright (C) 2015 Square, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package okhttp3;

import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
import okhttp3.internal.Util;
import okhttp3.internal.publicsuffix.PublicSuffixDatabase;
import okio.Buffer;

import static okhttp3.internal.Util.decodeHexDigit;
import static okhttp3.internal.Util.delimiterOffset;
import static okhttp3.internal.Util.skipLeadingAsciiWhitespace;
import static okhttp3.internal.Util.skipTrailingAsciiWhitespace;
import static okhttp3.internal.Util.verifyAsIpAddress;

/**
 * A uniform resource locator (URL) with a scheme of either {@code http} or {@code https}. Use this
 * class to compose and decompose Internet addresses. For example, this code will compose and print
 * a URL for Google search: 
   {@code
 *
 *   HttpUrl url = new HttpUrl.Builder()
 *       .scheme("https")
 *       .host("www.google.com")
 *       .addPathSegment("search")
 *       .addQueryParameter("q", "polar bears")
 *       .build();
 *   System.out.println(url);
 * }
* * which prints:
   {@code
 *
 *     https://www.google.com/search?q=polar%20bears
 * }
* * As another example, this code prints the human-readable query parameters of a Twitter search: *
   {@code
 *
 *   HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images");
 *   for (int i = 0, size = url.querySize(); i < size; i++) {
 *     System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i));
 *   }
 * }
* * which prints:
   {@code
 *
 *   q: cute #puppies
 *   f: images
 * }
* * In addition to composing URLs from their component parts and decomposing URLs into their * component parts, this class implements relative URL resolution: what address you'd reach by * clicking a relative link on a specified page. For example:
   {@code
 *
 *   HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos");
 *   HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc");
 *   System.out.println(link);
 * }
* * which prints:
   {@code
 *
 *   https://www.youtube.com/watch?v=cbP2N1BQdYc
 * }
* *

What's in a URL?

* * A URL has several components. * *

Scheme

* *

Sometimes referred to as protocol, A URL's scheme describes what mechanism should be * used to retrieve the resource. Although URLs have many schemes ({@code mailto}, {@code file}, * {@code ftp}), this class only supports {@code http} and {@code https}. Use {@link URI * java.net.URI} for URLs with arbitrary schemes. * *

Username and Password

* *

Username and password are either present, or the empty string {@code ""} if absent. This class * offers no mechanism to differentiate empty from absent. Neither of these components are popular * in practice. Typically HTTP applications use other mechanisms for user identification and * authentication. * *

Host

* *

The host identifies the webserver that serves the URL's resource. It is either a hostname like * {@code square.com} or {@code localhost}, an IPv4 address like {@code 192.168.0.1}, or an IPv6 * address like {@code ::1}. * *

Usually a webserver is reachable with multiple identifiers: its IP addresses, registered * domain names, and even {@code localhost} when connecting from the server itself. Each of a * webserver's names is a distinct URL and they are not interchangeable. For example, even if {@code * http://square.github.io/dagger} and {@code http://google.github.io/dagger} are served by the same * IP address, the two URLs identify different resources. * *

Port

* *

The port used to connect to the webserver. By default this is 80 for HTTP and 443 for HTTPS. * This class never returns -1 for the port: if no port is explicitly specified in the URL then the * scheme's default is used. * *

Path

* *

The path identifies a specific resource on the host. Paths have a hierarchical structure like * "/square/okhttp/issues/1486" and decompose into a list of segments like ["square", "okhttp", * "issues", "1486"]. * *

This class offers methods to compose and decompose paths by segment. It composes each path * from a list of segments by alternating between "/" and the encoded segment. For example the * segments ["a", "b"] build "/a/b" and the segments ["a", "b", ""] build "/a/b/". * *

If a path's last segment is the empty string then the path ends with "/". This class always * builds non-empty paths: if the path is omitted it defaults to "/". The default path's segment * list is a single empty string: [""]. * *

Query

* *

The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query * string is subdivided into a collection of name-value parameters. This class offers methods to set * the query as the single string, or as individual name-value parameters. With name-value * parameters the values are optional and names may be repeated. * *

Fragment

* *

The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and * query the fragment is not sent to the webserver: it's private to the client. * *

Encoding

* *

Each component must be encoded before it is embedded in the complete URL. As we saw above, the * string {@code cute #puppies} is encoded as {@code cute%20%23puppies} when used as a query * parameter value. * *

Percent encoding

* *

Percent encoding replaces a character (like {@code \ud83c\udf69}) with its UTF-8 hex bytes * (like {@code %F0%9F%8D%A9}). This approach works for whitespace characters, control characters, * non-ASCII characters, and characters that already have another meaning in a particular context. * *

Percent encoding is used in every URL component except for the hostname. But the set of * characters that need to be encoded is different for each component. For example, the path * component must escape all of its {@code ?} characters, otherwise it could be interpreted as the * start of the URL's query. But within the query and fragment components, the {@code ?} character * doesn't delimit anything and doesn't need to be escaped.

   {@code
 *
 *   HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder()
 *       .addPathSegment("_Who?_")
 *       .query("_Who?_")
 *       .fragment("_Who?_")
 *       .build();
 *   System.out.println(url);
 * }
* * This prints:
   {@code
 *
 *   http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_
 * }
* * When parsing URLs that lack percent encoding where it is required, this class will percent encode * the offending characters. * *

IDNA Mapping and Punycode encoding

* *

Hostnames have different requirements and use a different encoding scheme. It consists of IDNA * mapping and Punycode encoding. * *

In order to avoid confusion and discourage phishing attacks, IDNA Mapping transforms names to avoid * confusing characters. This includes basic case folding: transforming shouting {@code SQUARE.COM} * into cool and casual {@code square.com}. It also handles more exotic characters. For example, the * Unicode trademark sign (™) could be confused for the letters "TM" in {@code http://ho™mail.com}. * To mitigate this, the single character (™) maps to the string (tm). There is similar policy for * all of the 1.1 million Unicode code points. Note that some code points such as "\ud83c\udf69" are * not mapped and cannot be used in a hostname. * *

Punycode converts a Unicode string to an ASCII * string to make international domain names work everywhere. For example, "σ" encodes as "xn--4xa". * The encoded string is not human readable, but can be used with classes like {@link InetAddress} * to establish connections. * *

Why another URL model?

* *

Java includes both {@link URL java.net.URL} and {@link URI java.net.URI}. We offer a new URL * model to address problems that the others don't. * *

Different URLs should be different

* *

Although they have different content, {@code java.net.URL} considers the following two URLs * equal, and the {@link Object#equals equals()} method between them returns true: * *

    *
  • http://square.github.io/ *
  • http://google.github.io/ *
* * This is because those two hosts share the same IP address. This is an old, bad design decision * that makes {@code java.net.URL} unusable for many things. It shouldn't be used as a {@link * java.util.Map Map} key or in a {@link Set}. Doing so is both inefficient because equality may * require a DNS lookup, and incorrect because unequal URLs may be equal because of how they are * hosted. * *

Equal URLs should be equal

* *

These two URLs are semantically identical, but {@code java.net.URI} disagrees: * *

    *
  • http://host:80/ *
  • http://host *
* * Both the unnecessary port specification ({@code :80}) and the absent trailing slash ({@code /}) * cause URI to bucket the two URLs separately. This harms URI's usefulness in collections. Any * application that stores information-per-URL will need to either canonicalize manually, or suffer * unnecessary redundancy for such URLs. * *

Because they don't attempt canonical form, these classes are surprisingly difficult to use * securely. Suppose you're building a webservice that checks that incoming paths are prefixed * "/static/images/" before serving the corresponding assets from the filesystem.

   {@code
 *
 *   String attack = "http://example.com/static/images/../../../../../etc/passwd";
 *   System.out.println(new URL(attack).getPath());
 *   System.out.println(new URI(attack).getPath());
 *   System.out.println(HttpUrl.parse(attack).encodedPath());
 * }
* * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that * checks only the path prefix may suffer! *
   {@code
 *
 *    /static/images/../../../../../etc/passwd
 *    /static/images/../../../../../etc/passwd
 *    /etc/passwd
 * }
* *

If it works on the web, it should work in your application

* *

The {@code java.net.URI} class is strict around what URLs it accepts. It rejects URLs like * "http://example.com/abc|def" because the '|' character is unsupported. This class is more * forgiving: it will automatically percent-encode the '|', yielding "http://example.com/abc%7Cdef". * This kind behavior is consistent with web browsers. {@code HttpUrl} prefers consistency with * major web browsers over consistency with obsolete specifications. * *

Paths and Queries should decompose

* *

Neither of the built-in URL models offer direct access to path segments or query parameters. * Manually using {@code StringBuilder} to assemble these components is cumbersome: do '+' * characters get silently replaced with spaces? If a query parameter contains a '&', does that * get escaped? By offering methods to read and write individual query parameters directly, * application developers are saved from the hassles of encoding and decoding. * *

Plus a modern API

* *

The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping * constructors. For example, there's no API to compose a URI with a custom port without also * providing a query and fragment. * *

Instances of {@link HttpUrl} are well-formed and always have a scheme, host, and path. With * {@code java.net.URL} it's possible to create an awkward URL like {@code http:/} with scheme and * path but no hostname. Building APIs that consume such malformed values is difficult! * *

This class has a modern API. It avoids punitive checked exceptions: {@link #get get()} * throws {@link IllegalArgumentException} on invalid input or {@link #parse parse()} * returns null if the input is an invalid URL. You can even be explicit about whether each * component has been encoded already. */ public final class HttpUrl { private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"; static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"; static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#"; static final String PATH_SEGMENT_ENCODE_SET_URI = "[]"; static final String QUERY_ENCODE_SET = " \"'<>#"; static final String QUERY_COMPONENT_REENCODE_SET = " \"'<>#&="; static final String QUERY_COMPONENT_ENCODE_SET = " !\"#$&'(),/:;<=>?@[]\\^`{|}~"; static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}"; static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~"; static final String FRAGMENT_ENCODE_SET = ""; static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}"; /** Either "http" or "https". */ final String scheme; /** Decoded username. */ private final String username; /** Decoded password. */ private final String password; /** Canonical hostname. */ final String host; /** Either 80, 443 or a user-specified port. In range [1..65535]. */ final int port; /** * A list of canonical path segments. This list always contains at least one element, which may be * the empty string. Each segment is formatted with a leading '/', so if path segments were ["a", * "b", ""], then the encoded path would be "/a/b/". */ private final List pathSegments; /** * Alternating, decoded query names and values, or null for no query. Names may be empty or * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or * empty, or non-empty. */ private final @Nullable List queryNamesAndValues; /** Decoded fragment. */ private final @Nullable String fragment; /** Canonical URL. */ private final String url; HttpUrl(Builder builder) { this.scheme = builder.scheme; this.username = percentDecode(builder.encodedUsername, false); this.password = percentDecode(builder.encodedPassword, false); this.host = builder.host; this.port = builder.effectivePort(); this.pathSegments = percentDecode(builder.encodedPathSegments, false); this.queryNamesAndValues = builder.encodedQueryNamesAndValues != null ? percentDecode(builder.encodedQueryNamesAndValues, true) : null; this.fragment = builder.encodedFragment != null ? percentDecode(builder.encodedFragment, false) : null; this.url = builder.toString(); } /** Returns this URL as a {@link URL java.net.URL}. */ public URL url() { try { return new URL(url); } catch (MalformedURLException e) { throw new RuntimeException(e); // Unexpected! } } /** * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this * class, the returned URI may be semantically different from this URL: * *

    *
  • Characters forbidden by URI like {@code [} and {@code |} will be escaped. *
  • Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}. *
  • Whitespace and control characters in the fragment will be stripped. *
* *

These differences may have a significant consequence when the URI is interpreted by a * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided. */ public URI uri() { String uri = newBuilder().reencodeForUri().toString(); try { return new URI(uri); } catch (URISyntaxException e) { // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry. try { String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", ""); return URI.create(stripped); } catch (Exception e1) { throw new RuntimeException(e); // Unexpected! } } } /** Returns either "http" or "https". */ public String scheme() { return scheme; } public boolean isHttps() { return scheme.equals("https"); } /** * Returns the username, or an empty string if none is set. * *

* * * * * *
URL{@code encodedUsername()}
{@code http://host/}{@code ""}
{@code http://username@host/}{@code "username"}
{@code http://username:password@host/}{@code "username"}
{@code http://a%20b:c%20d@host/}{@code "a%20b"}
*/ public String encodedUsername() { if (username.isEmpty()) return ""; int usernameStart = scheme.length() + 3; // "://".length() == 3. int usernameEnd = delimiterOffset(url, usernameStart, url.length(), ":@"); return url.substring(usernameStart, usernameEnd); } /** * Returns the decoded username, or an empty string if none is present. * *

* * * * * *
URL{@code username()}
{@code http://host/}{@code ""}
{@code http://username@host/}{@code "username"}
{@code http://username:password@host/}{@code "username"}
{@code http://a%20b:c%20d@host/}{@code "a b"}
*/ public String username() { return username; } /** * Returns the password, or an empty string if none is set. * *

* * * * * *
URL{@code encodedPassword()}
{@code http://host/}{@code ""}
{@code http://username@host/}{@code ""}
{@code http://username:password@host/}{@code "password"}
{@code http://a%20b:c%20d@host/}{@code "c%20d"}
*/ public String encodedPassword() { if (password.isEmpty()) return ""; int passwordStart = url.indexOf(':', scheme.length() + 3) + 1; int passwordEnd = url.indexOf('@'); return url.substring(passwordStart, passwordEnd); } /** * Returns the decoded password, or an empty string if none is present. * *

* * * * * *
URL{@code password()}
{@code http://host/}{@code ""}
{@code http://username@host/}{@code ""}
{@code http://username:password@host/}{@code "password"}
{@code http://a%20b:c%20d@host/}{@code "c d"}
*/ public String password() { return password; } /** * Returns the host address suitable for use with {@link InetAddress#getAllByName(String)}. May * be: * *
    *
  • A regular host name, like {@code android.com}. *
  • An IPv4 address, like {@code 127.0.0.1}. *
  • An IPv6 address, like {@code ::1}. Note that there are no square braces. *
  • An encoded IDN, like {@code xn--n3h.net}. *
* *

* * * * * *
URL{@code host()}
{@code http://android.com/}{@code "android.com"}
{@code http://127.0.0.1/}{@code "127.0.0.1"}
{@code http://[::1]/}{@code "::1"}
{@code http://xn--n3h.net/}{@code "xn--n3h.net"}
*/ public String host() { return host; } /** * Returns the explicitly-specified port if one was provided, or the default port for this URL's * scheme. For example, this returns 8443 for {@code https://square.com:8443/} and 443 for {@code * https://square.com/}. The result is in {@code [1..65535]}. * *

* * * * *
URL{@code port()}
{@code http://host/}{@code 80}
{@code http://host:8000/}{@code 8000}
{@code https://host/}{@code 443}
*/ public int port() { return port; } /** * Returns 80 if {@code scheme.equals("http")}, 443 if {@code scheme.equals("https")} and -1 * otherwise. */ public static int defaultPort(String scheme) { if (scheme.equals("http")) { return 80; } else if (scheme.equals("https")) { return 443; } else { return -1; } } /** * Returns the number of segments in this URL's path. This is also the number of slashes in the * URL's path, like 3 in {@code http://host/a/b/c}. This is always at least 1. * *

* * * * *
URL{@code pathSize()}
{@code http://host/}{@code 1}
{@code http://host/a/b/c}{@code 3}
{@code http://host/a/b/c/}{@code 4}
*/ public int pathSize() { return pathSegments.size(); } /** * Returns the entire path of this URL encoded for use in HTTP resource resolution. The returned * path will start with {@code "/"}. * *

* * * * *
URL{@code encodedPath()}
{@code http://host/}{@code "/"}
{@code http://host/a/b/c}{@code "/a/b/c"}
{@code http://host/a/b%20c/d}{@code "/a/b%20c/d"}
*/ public String encodedPath() { int pathStart = url.indexOf('/', scheme.length() + 3); // "://".length() == 3. int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#"); return url.substring(pathStart, pathEnd); } static void pathSegmentsToString(StringBuilder out, List pathSegments) { for (int i = 0, size = pathSegments.size(); i < size; i++) { out.append('/'); out.append(pathSegments.get(i)); } } /** * Returns a list of encoded path segments like {@code ["a", "b", "c"]} for the URL {@code * http://host/a/b/c}. This list is never empty though it may contain a single empty string. * *

* * * * *
URL{@code encodedPathSegments()}
{@code http://host/}{@code [""]}
{@code http://host/a/b/c}{@code ["a", "b", "c"]}
{@code http://host/a/b%20c/d}{@code ["a", "b%20c", "d"]}
*/ public List encodedPathSegments() { int pathStart = url.indexOf('/', scheme.length() + 3); int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#"); List result = new ArrayList<>(); for (int i = pathStart; i < pathEnd; ) { i++; // Skip the '/'. int segmentEnd = delimiterOffset(url, i, pathEnd, '/'); result.add(url.substring(i, segmentEnd)); i = segmentEnd; } return result; } /** * Returns a list of path segments like {@code ["a", "b", "c"]} for the URL {@code * http://host/a/b/c}. This list is never empty though it may contain a single empty string. * *

* * * * *
URL{@code pathSegments()}
{@code http://host/}{@code [""]}
{@code http://host/a/b/c"}{@code ["a", "b", "c"]}
{@code http://host/a/b%20c/d"}{@code ["a", "b c", "d"]}
*/ public List pathSegments() { return pathSegments; } /** * Returns the query of this URL, encoded for use in HTTP resource resolution. The returned string * may be null (for URLs with no query), empty (for URLs with an empty query) or non-empty (all * other URLs). * *

* * * * * * *
URL{@code encodedQuery()}
{@code http://host/}null
{@code http://host/?}{@code ""}
{@code http://host/?a=apple&k=key+lime}{@code * "a=apple&k=key+lime"}
{@code http://host/?a=apple&a=apricot}{@code "a=apple&a=apricot"}
{@code http://host/?a=apple&b}{@code "a=apple&b"}
*/ public @Nullable String encodedQuery() { if (queryNamesAndValues == null) return null; // No query. int queryStart = url.indexOf('?') + 1; int queryEnd = delimiterOffset(url, queryStart, url.length(), '#'); return url.substring(queryStart, queryEnd); } static void namesAndValuesToQueryString(StringBuilder out, List namesAndValues) { for (int i = 0, size = namesAndValues.size(); i < size; i += 2) { String name = namesAndValues.get(i); String value = namesAndValues.get(i + 1); if (i > 0) out.append('&'); out.append(name); if (value != null) { out.append('='); out.append(value); } } } /** * Cuts {@code encodedQuery} up into alternating parameter names and values. This divides a query * string like {@code subject=math&easy&problem=5-2=3} into the list {@code ["subject", "math", * "easy", null, "problem", "5-2=3"]}. Note that values may be null and may contain '=' * characters. */ static List queryStringToNamesAndValues(String encodedQuery) { List result = new ArrayList<>(); for (int pos = 0; pos <= encodedQuery.length(); ) { int ampersandOffset = encodedQuery.indexOf('&', pos); if (ampersandOffset == -1) ampersandOffset = encodedQuery.length(); int equalsOffset = encodedQuery.indexOf('=', pos); if (equalsOffset == -1 || equalsOffset > ampersandOffset) { result.add(encodedQuery.substring(pos, ampersandOffset)); result.add(null); // No value for this name. } else { result.add(encodedQuery.substring(pos, equalsOffset)); result.add(encodedQuery.substring(equalsOffset + 1, ampersandOffset)); } pos = ampersandOffset + 1; } return result; } /** * Returns this URL's query, like {@code "abc"} for {@code http://host/?abc}. Most callers should * prefer {@link #queryParameterName} and {@link #queryParameterValue} because these methods offer * direct access to individual query parameters. * *

* * * * * * *
URL{@code query()}
{@code http://host/}null
{@code http://host/?}{@code ""}
{@code http://host/?a=apple&k=key+lime}{@code "a=apple&k=key * lime"}
{@code http://host/?a=apple&a=apricot}{@code "a=apple&a=apricot"}
{@code http://host/?a=apple&b}{@code "a=apple&b"}
*/ public @Nullable String query() { if (queryNamesAndValues == null) return null; // No query. StringBuilder result = new StringBuilder(); namesAndValuesToQueryString(result, queryNamesAndValues); return result.toString(); } /** * Returns the number of query parameters in this URL, like 2 for {@code * http://host/?a=apple&b=banana}. If this URL has no query this returns 0. Otherwise it returns * one more than the number of {@code "&"} separators in the query. * *

* * * * * * *
URL{@code querySize()}
{@code http://host/}{@code 0}
{@code http://host/?}{@code 1}
{@code http://host/?a=apple&k=key+lime}{@code 2}
{@code http://host/?a=apple&a=apricot}{@code 2}
{@code http://host/?a=apple&b}{@code 2}
*/ public int querySize() { return queryNamesAndValues != null ? queryNamesAndValues.size() / 2 : 0; } /** * Returns the first query parameter named {@code name} decoded using UTF-8, or null if there is * no such query parameter. * *

* * * * * * *
URL{@code queryParameter("a")}
{@code http://host/}null
{@code http://host/?}null
{@code http://host/?a=apple&k=key+lime}{@code "apple"}
{@code http://host/?a=apple&a=apricot}{@code "apple"}
{@code http://host/?a=apple&b}{@code "apple"}
*/ public @Nullable String queryParameter(String name) { if (queryNamesAndValues == null) return null; for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { if (name.equals(queryNamesAndValues.get(i))) { return queryNamesAndValues.get(i + 1); } } return null; } /** * Returns the distinct query parameter names in this URL, like {@code ["a", "b"]} for {@code * http://host/?a=apple&b=banana}. If this URL has no query this returns the empty set. * *

* * * * * * *
URL{@code queryParameterNames()}
{@code http://host/}{@code []}
{@code http://host/?}{@code [""]}
{@code http://host/?a=apple&k=key+lime}{@code ["a", "k"]}
{@code http://host/?a=apple&a=apricot}{@code ["a"]}
{@code http://host/?a=apple&b}{@code ["a", "b"]}
*/ public Set queryParameterNames() { if (queryNamesAndValues == null) return Collections.emptySet(); Set result = new LinkedHashSet<>(); for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { result.add(queryNamesAndValues.get(i)); } return Collections.unmodifiableSet(result); } /** * Returns all values for the query parameter {@code name} ordered by their appearance in this * URL. For example this returns {@code ["banana"]} for {@code queryParameterValue("b")} on {@code * http://host/?a=apple&b=banana}. * *

* * * * * * *
URL{@code queryParameterValues("a")}{@code * queryParameterValues("b")}
{@code http://host/}{@code []}{@code []}
{@code http://host/?}{@code []}{@code []}
{@code http://host/?a=apple&k=key+lime}{@code ["apple"]}{@code * []}
{@code http://host/?a=apple&a=apricot}{@code ["apple", * "apricot"]}{@code []}
{@code http://host/?a=apple&b}{@code ["apple"]}{@code * [null]}
*/ public List queryParameterValues(String name) { if (queryNamesAndValues == null) return Collections.emptyList(); List result = new ArrayList<>(); for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { if (name.equals(queryNamesAndValues.get(i))) { result.add(queryNamesAndValues.get(i + 1)); } } return Collections.unmodifiableList(result); } /** * Returns the name of the query parameter at {@code index}. For example this returns {@code "a"} * for {@code queryParameterName(0)} on {@code http://host/?a=apple&b=banana}. This throws if * {@code index} is not less than the {@linkplain #querySize query size}. * *

* * * * * * *
URL{@code queryParameterName(0)}{@code * queryParameterName(1)}
{@code http://host/}exceptionexception
{@code http://host/?}{@code ""}exception
{@code http://host/?a=apple&k=key+lime}{@code "a"}{@code * "k"}
{@code http://host/?a=apple&a=apricot}{@code "a"}{@code * "a"}
{@code http://host/?a=apple&b}{@code "a"}{@code "b"}
*/ public String queryParameterName(int index) { if (queryNamesAndValues == null) throw new IndexOutOfBoundsException(); return queryNamesAndValues.get(index * 2); } /** * Returns the value of the query parameter at {@code index}. For example this returns {@code * "apple"} for {@code queryParameterName(0)} on {@code http://host/?a=apple&b=banana}. This * throws if {@code index} is not less than the {@linkplain #querySize query size}. * *

* * * * * * *
URL{@code queryParameterValue(0)}{@code * queryParameterValue(1)}
{@code http://host/}exceptionexception
{@code http://host/?}nullexception
{@code http://host/?a=apple&k=key+lime}{@code "apple"}{@code * "key lime"}
{@code http://host/?a=apple&a=apricot}{@code "apple"}{@code * "apricot"}
{@code http://host/?a=apple&b}{@code "apple"}null
*/ public String queryParameterValue(int index) { if (queryNamesAndValues == null) throw new IndexOutOfBoundsException(); return queryNamesAndValues.get(index * 2 + 1); } /** * Returns this URL's encoded fragment, like {@code "abc"} for {@code http://host/#abc}. This * returns null if the URL has no fragment. * *

* * * * * *
URL{@code encodedFragment()}
{@code http://host/}null
{@code http://host/#}{@code ""}
{@code http://host/#abc}{@code "abc"}
{@code http://host/#abc|def}{@code "abc|def"}
*/ public @Nullable String encodedFragment() { if (fragment == null) return null; int fragmentStart = url.indexOf('#') + 1; return url.substring(fragmentStart); } /** * Returns this URL's fragment, like {@code "abc"} for {@code http://host/#abc}. This returns null * if the URL has no fragment. * *

* * * * * *
URL{@code fragment()}
{@code http://host/}null
{@code http://host/#}{@code ""}
{@code http://host/#abc}{@code "abc"}
{@code http://host/#abc|def}{@code "abc|def"}
*/ public @Nullable String fragment() { return fragment; } /** * Returns a string with containing this URL with its username, password, query, and fragment * stripped, and its path replaced with {@code /...}. For example, redacting {@code * http://username:[email protected]/path} returns {@code http://example.com/...}. */ public String redact() { return newBuilder("/...") .username("") .password("") .build() .toString(); } /** * Returns the URL that would be retrieved by following {@code link} from this URL, or null if * the resulting URL is not well-formed. */ public @Nullable HttpUrl resolve(String link) { Builder builder = newBuilder(link); return builder != null ? builder.build() : null; } public Builder newBuilder() { Builder result = new Builder(); result.scheme = scheme; result.encodedUsername = encodedUsername(); result.encodedPassword = encodedPassword(); result.host = host; // If we're set to a default port, unset it in case of a scheme change. result.port = port != defaultPort(scheme) ? port : -1; result.encodedPathSegments.clear(); result.encodedPathSegments.addAll(encodedPathSegments()); result.encodedQuery(encodedQuery()); result.encodedFragment = encodedFragment(); return result; } /** * Returns a builder for the URL that would be retrieved by following {@code link} from this URL, * or null if the resulting URL is not well-formed. */ public @Nullable Builder newBuilder(String link) { try { return new Builder().parse(this, link); } catch (IllegalArgumentException ignored) { return null; } } /** * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS * URL, or null if it isn't. */ public static @Nullable HttpUrl parse(String url) { try { return get(url); } catch (IllegalArgumentException ignored) { return null; } } /** * Returns a new {@code HttpUrl} representing {@code url}. * * @throws IllegalArgumentException If {@code url} is not a well-formed HTTP or HTTPS URL. */ public static HttpUrl get(String url) { return new Builder().parse(null, url).build(); } /** * Returns an {@link HttpUrl} for {@code url} if its protocol is {@code http} or {@code https}, or * null if it has any other protocol. */ public static @Nullable HttpUrl get(URL url) { return parse(url.toString()); } public static @Nullable HttpUrl get(URI uri) { return parse(uri.toString()); } @Override public boolean equals(@Nullable Object other) { return other instanceof HttpUrl && ((HttpUrl) other).url.equals(url); } @Override public int hashCode() { return url.hashCode(); } @Override public String toString() { return url; } /** * Returns the domain name of this URL's {@link #host()} that is one level beneath the public * suffix by consulting the public suffix list. Returns * null if this URL's {@link #host()} is an IP address or is considered a public suffix by the * public suffix list. * *

In general this method should not be used to test whether a domain is valid * or routable. Instead, DNS is the recommended source for that information. * *

* * * * * * * *
URL{@code topPrivateDomain()}
{@code http://google.com}{@code "google.com"}
{@code http://adwords.google.co.uk}{@code "google.co.uk"}
{@code http://square}null
{@code http://co.uk}null
{@code http://localhost}null
{@code http://127.0.0.1}null
*/ public @Nullable String topPrivateDomain() { if (verifyAsIpAddress(host)) return null; return PublicSuffixDatabase.get().getEffectiveTldPlusOne(host); } public static final class Builder { @Nullable String scheme; String encodedUsername = ""; String encodedPassword = ""; @Nullable String host; int port = -1; final List encodedPathSegments = new ArrayList<>(); @Nullable List encodedQueryNamesAndValues; @Nullable String encodedFragment; public Builder() { encodedPathSegments.add(""); // The default path is '/' which needs a trailing space. } public Builder scheme(String scheme) { if (scheme == null) { throw new NullPointerException("scheme == null"); } else if (scheme.equalsIgnoreCase("http")) { this.scheme = "http"; } else if (scheme.equalsIgnoreCase("https")) { this.scheme = "https"; } else { throw new IllegalArgumentException("unexpected scheme: " + scheme); } return this; } public Builder username(String username) { if (username == null) throw new NullPointerException("username == null"); this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true); return this; } public Builder encodedUsername(String encodedUsername) { if (encodedUsername == null) throw new NullPointerException("encodedUsername == null"); this.encodedUsername = canonicalize( encodedUsername, USERNAME_ENCODE_SET, true, false, false, true); return this; } public Builder password(String password) { if (password == null) throw new NullPointerException("password == null"); this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true); return this; } public Builder encodedPassword(String encodedPassword) { if (encodedPassword == null) throw new NullPointerException("encodedPassword == null"); this.encodedPassword = canonicalize( encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true); return this; } /** * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6 * address. */ public Builder host(String host) { if (host == null) throw new NullPointerException("host == null"); String encoded = canonicalizeHost(host, 0, host.length()); if (encoded == null) throw new IllegalArgumentException("unexpected host: " + host); this.host = encoded; return this; } public Builder port(int port) { if (port <= 0 || port > 65535) throw new IllegalArgumentException("unexpected port: " + port); this.port = port; return this; } int effectivePort() { return port != -1 ? port : defaultPort(scheme); } public Builder addPathSegment(String pathSegment) { if (pathSegment == null) throw new NullPointerException("pathSegment == null"); push(pathSegment, 0, pathSegment.length(), false, false); return this; } /** * Adds a set of path segments separated by a slash (either {@code \} or {@code /}). If * {@code pathSegments} starts with a slash, the resulting URL will have empty path segment. */ public Builder addPathSegments(String pathSegments) { if (pathSegments == null) throw new NullPointerException("pathSegments == null"); return addPathSegments(pathSegments, false); } public Builder addEncodedPathSegment(String encodedPathSegment) { if (encodedPathSegment == null) { throw new NullPointerException("encodedPathSegment == null"); } push(encodedPathSegment, 0, encodedPathSegment.length(), false, true); return this; } /** * Adds a set of encoded path segments separated by a slash (either {@code \} or {@code /}). If * {@code encodedPathSegments} starts with a slash, the resulting URL will have empty path * segment. */ public Builder addEncodedPathSegments(String encodedPathSegments) { if (encodedPathSegments == null) { throw new NullPointerException("encodedPathSegments == null"); } return addPathSegments(encodedPathSegments, true); } private Builder addPathSegments(String pathSegments, boolean alreadyEncoded) { int offset = 0; do { int segmentEnd = delimiterOffset(pathSegments, offset, pathSegments.length(), "/\\"); boolean addTrailingSlash = segmentEnd < pathSegments.length(); push(pathSegments, offset, segmentEnd, addTrailingSlash, alreadyEncoded); offset = segmentEnd + 1; } while (offset <= pathSegments.length()); return this; } public Builder setPathSegment(int index, String pathSegment) { if (pathSegment == null) throw new NullPointerException("pathSegment == null"); String canonicalPathSegment = canonicalize( pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true, null); if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { throw new IllegalArgumentException("unexpected path segment: " + pathSegment); } encodedPathSegments.set(index, canonicalPathSegment); return this; } public Builder setEncodedPathSegment(int index, String encodedPathSegment) { if (encodedPathSegment == null) { throw new NullPointerException("encodedPathSegment == null"); } String canonicalPathSegment = canonicalize(encodedPathSegment, 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true, null); encodedPathSegments.set(index, canonicalPathSegment); if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment); } return this; } public Builder removePathSegment(int index) { encodedPathSegments.remove(index); if (encodedPathSegments.isEmpty()) { encodedPathSegments.add(""); // Always leave at least one '/'. } return this; } public Builder encodedPath(String encodedPath) { if (encodedPath == null) throw new NullPointerException("encodedPath == null"); if (!encodedPath.startsWith("/")) { throw new IllegalArgumentException("unexpected encodedPath: " + encodedPath); } resolvePath(encodedPath, 0, encodedPath.length()); return this; } public Builder query(@Nullable String query) { this.encodedQueryNamesAndValues = query != null ? queryStringToNamesAndValues(canonicalize( query, QUERY_ENCODE_SET, false, false, true, true)) : null; return this; } public Builder encodedQuery(@Nullable String encodedQuery) { this.encodedQueryNamesAndValues = encodedQuery != null ? queryStringToNamesAndValues( canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true)) : null; return this; } /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */ public Builder addQueryParameter(String name, @Nullable String value) { if (name == null) throw new NullPointerException("name == null"); if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); encodedQueryNamesAndValues.add( canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)); encodedQueryNamesAndValues.add(value != null ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true) : null); return this; } /** Adds the pre-encoded query parameter to this URL's query string. */ public Builder addEncodedQueryParameter(String encodedName, @Nullable String encodedValue) { if (encodedName == null) throw new NullPointerException("encodedName == null"); if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); encodedQueryNamesAndValues.add( canonicalize(encodedName, QUERY_COMPONENT_REENCODE_SET, true, false, true, true)); encodedQueryNamesAndValues.add(encodedValue != null ? canonicalize(encodedValue, QUERY_COMPONENT_REENCODE_SET, true, false, true, true) : null); return this; } public Builder setQueryParameter(String name, @Nullable String value) { removeAllQueryParameters(name); addQueryParameter(name, value); return this; } public Builder setEncodedQueryParameter(String encodedName, @Nullable String encodedValue) { removeAllEncodedQueryParameters(encodedName); addEncodedQueryParameter(encodedName, encodedValue); return this; } public Builder removeAllQueryParameters(String name) { if (name == null) throw new NullPointerException("name == null"); if (encodedQueryNamesAndValues == null) return this; String nameToRemove = canonicalize( name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true); removeAllCanonicalQueryParameters(nameToRemove); return this; } public Builder removeAllEncodedQueryParameters(String encodedName) { if (encodedName == null) throw new NullPointerException("encodedName == null"); if (encodedQueryNamesAndValues == null) return this; removeAllCanonicalQueryParameters( canonicalize(encodedName, QUERY_COMPONENT_REENCODE_SET, true, false, true, true)); return this; } private void removeAllCanonicalQueryParameters(String canonicalName) { for (int i = encodedQueryNamesAndValues.size() - 2; i >= 0; i -= 2) { if (canonicalName.equals(encodedQueryNamesAndValues.get(i))) { encodedQueryNamesAndValues.remove(i + 1); encodedQueryNamesAndValues.remove(i); if (encodedQueryNamesAndValues.isEmpty()) { encodedQueryNamesAndValues = null; return; } } } } public Builder fragment(@Nullable String fragment) { this.encodedFragment = fragment != null ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false) : null; return this; } public Builder encodedFragment(@Nullable String encodedFragment) { this.encodedFragment = encodedFragment != null ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false) : null; return this; } /** * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is * particularly strict for certain components. */ Builder reencodeForUri() { for (int i = 0, size = encodedPathSegments.size(); i < size; i++) { String pathSegment = encodedPathSegments.get(i); encodedPathSegments.set(i, canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true)); } if (encodedQueryNamesAndValues != null) { for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) { String component = encodedQueryNamesAndValues.get(i); if (component != null) { encodedQueryNamesAndValues.set(i, canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true)); } } } if (encodedFragment != null) { encodedFragment = canonicalize( encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false); } return this; } public HttpUrl build() { if (scheme == null) throw new IllegalStateException("scheme == null"); if (host == null) throw new IllegalStateException("host == null"); return new HttpUrl(this); } @Override public String toString() { StringBuilder result = new StringBuilder(); if (scheme != null) { result.append(scheme); result.append("://"); } else { result.append("//"); } if (!encodedUsername.isEmpty() || !encodedPassword.isEmpty()) { result.append(encodedUsername); if (!encodedPassword.isEmpty()) { result.append(':'); result.append(encodedPassword); } result.append('@'); } if (host != null) { if (host.indexOf(':') != -1) { // Host is an IPv6 address. result.append('['); result.append(host); result.append(']'); } else { result.append(host); } } if (port != -1 || scheme != null) { int effectivePort = effectivePort(); if (scheme == null || effectivePort != defaultPort(scheme)) { result.append(':'); result.append(effectivePort); } } pathSegmentsToString(result, encodedPathSegments); if (encodedQueryNamesAndValues != null) { result.append('?'); namesAndValuesToQueryString(result, encodedQueryNamesAndValues); } if (encodedFragment != null) { result.append('#'); result.append(encodedFragment); } return result.toString(); } static final String INVALID_HOST = "Invalid URL host"; Builder parse(@Nullable HttpUrl base, String input) { int pos = skipLeadingAsciiWhitespace(input, 0, input.length()); int limit = skipTrailingAsciiWhitespace(input, pos, input.length()); // Scheme. int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit); if (schemeDelimiterOffset != -1) { if (input.regionMatches(true, pos, "https:", 0, 6)) { this.scheme = "https"; pos += "https:".length(); } else if (input.regionMatches(true, pos, "http:", 0, 5)) { this.scheme = "http"; pos += "http:".length(); } else { throw new IllegalArgumentException("Expected URL scheme 'http' or 'https' but was '" + input.substring(0, schemeDelimiterOffset) + "'"); } } else if (base != null) { this.scheme = base.scheme; } else { throw new IllegalArgumentException( "Expected URL scheme 'http' or 'https' but no colon was found"); } // Authority. boolean hasUsername = false; boolean hasPassword = false; int slashCount = slashCount(input, pos, limit); if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) { // Read an authority if either: // * The input starts with 2 or more slashes. These follow the scheme if it exists. // * The input scheme exists and is different from the base URL's scheme. // // The structure of an authority is: // username:password@host:port // // Username, password and port are optional. // [username[:password]@]host[:port] pos += slashCount; authority: while (true) { int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#"); int c = componentDelimiterOffset != limit ? input.charAt(componentDelimiterOffset) : -1; switch (c) { case '@': // User info precedes. if (!hasPassword) { int passwordColonOffset = delimiterOffset( input, pos, componentDelimiterOffset, ':'); String canonicalUsername = canonicalize( input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true, null); this.encodedUsername = hasUsername ? this.encodedUsername + "%40" + canonicalUsername : canonicalUsername; if (passwordColonOffset != componentDelimiterOffset) { hasPassword = true; this.encodedPassword = canonicalize(input, passwordColonOffset + 1, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true, null); } hasUsername = true; } else { this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true, null); } pos = componentDelimiterOffset + 1; break; case -1: case '/': case '\\': case '?': case '#': // Host info precedes. int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset); if (portColonOffset + 1 < componentDelimiterOffset) { host = canonicalizeHost(input, pos, portColonOffset); port = parsePort(input, portColonOffset + 1, componentDelimiterOffset); if (port == -1) { throw new IllegalArgumentException("Invalid URL port: \"" + input.substring(portColonOffset + 1, componentDelimiterOffset) + '"'); } } else { host = canonicalizeHost(input, pos, portColonOffset); port = defaultPort(scheme); } if (host == null) { throw new IllegalArgumentException( INVALID_HOST + ": \"" + input.substring(pos, portColonOffset) + '"'); } pos = componentDelimiterOffset; break authority; } } } else { // This is a relative link. Copy over all authority components. Also maybe the path & query. this.encodedUsername = base.encodedUsername(); this.encodedPassword = base.encodedPassword(); this.host = base.host; this.port = base.port; this.encodedPathSegments.clear(); this.encodedPathSegments.addAll(base.encodedPathSegments()); if (pos == limit || input.charAt(pos) == '#') { encodedQuery(base.encodedQuery()); } } // Resolve the relative path. int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#"); resolvePath(input, pos, pathDelimiterOffset); pos = pathDelimiterOffset; // Query. if (pos < limit && input.charAt(pos) == '?') { int queryDelimiterOffset = delimiterOffset(input, pos, limit, '#'); this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize( input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true, null)); pos = queryDelimiterOffset; } // Fragment. if (pos < limit && input.charAt(pos) == '#') { this.encodedFragment = canonicalize( input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false, null); } return this; } private void resolvePath(String input, int pos, int limit) { // Read a delimiter. if (pos == limit) { // Empty path: keep the base path as-is. return; } char c = input.charAt(pos); if (c == '/' || c == '\\') { // Absolute path: reset to the default "/". encodedPathSegments.clear(); encodedPathSegments.add(""); pos++; } else { // Relative path: clear everything after the last '/'. encodedPathSegments.set(encodedPathSegments.size() - 1, ""); } // Read path segments. for (int i = pos; i < limit; ) { int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\"); boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit; push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true); i = pathSegmentDelimiterOffset; if (segmentHasTrailingSlash) i++; } } /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */ private void push(String input, int pos, int limit, boolean addTrailingSlash, boolean alreadyEncoded) { String segment = canonicalize( input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true, null); if (isDot(segment)) { return; // Skip '.' path segments. } if (isDotDot(segment)) { pop(); return; } if (encodedPathSegments.get(encodedPathSegments.size() - 1).isEmpty()) { encodedPathSegments.set(encodedPathSegments.size() - 1, segment); } else { encodedPathSegments.add(segment); } if (addTrailingSlash) { encodedPathSegments.add(""); } } private boolean isDot(String input) { return input.equals(".") || input.equalsIgnoreCase("%2e"); } private boolean isDotDot(String input) { return input.equals("..") || input.equalsIgnoreCase("%2e.") || input.equalsIgnoreCase(".%2e") || input.equalsIgnoreCase("%2e%2e"); } /** * Removes a path segment. When this method returns the last segment is always "", which means * the encoded path will have a trailing '/'. * *

Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from ["a", * "b", "c", ""] to ["a", "b", ""]. * *

Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"] * to ["a", "b", ""]. */ private void pop() { String removed = encodedPathSegments.remove(encodedPathSegments.size() - 1); // Make sure the path ends with a '/' by either adding an empty string or clearing a segment. if (removed.isEmpty() && !encodedPathSegments.isEmpty()) { encodedPathSegments.set(encodedPathSegments.size() - 1, ""); } else { encodedPathSegments.add(""); } } /** * Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if * {@code input} does not have a scheme that starts at {@code pos}. */ private static int schemeDelimiterOffset(String input, int pos, int limit) { if (limit - pos < 2) return -1; char c0 = input.charAt(pos); if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char. for (int i = pos + 1; i < limit; i++) { char c = input.charAt(i); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '+' || c == '-' || c == '.') { continue; // Scheme character. Keep going. } else if (c == ':') { return i; // Scheme prefix! } else { return -1; // Non-scheme character before the first ':'. } } return -1; // No ':'; doesn't start with a scheme. } /** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */ private static int slashCount(String input, int pos, int limit) { int slashCount = 0; while (pos < limit) { char c = input.charAt(pos); if (c == '\\' || c == '/') { slashCount++; pos++; } else { break; } } return slashCount; } /** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */ private static int portColonOffset(String input, int pos, int limit) { for (int i = pos; i < limit; i++) { switch (input.charAt(i)) { case '[': while (++i < limit) { if (input.charAt(i) == ']') break; } break; case ':': return i; } } return limit; // No colon. } private static String canonicalizeHost(String input, int pos, int limit) { // Start by percent decoding the host. The WHATWG spec suggests doing this only after we've // checked for IPv6 square braces. But Chrome does it first, and that's more lenient. String percentDecoded = percentDecode(input, pos, limit, false); return Util.canonicalizeHost(percentDecoded); } private static int parsePort(String input, int pos, int limit) { try { // Canonicalize the port string to skip '\n' etc. String portString = canonicalize(input, pos, limit, "", false, false, false, true, null); int i = Integer.parseInt(portString); if (i > 0 && i <= 65535) return i; return -1; } catch (NumberFormatException e) { return -1; // Invalid port. } } } static String percentDecode(String encoded, boolean plusIsSpace) { return percentDecode(encoded, 0, encoded.length(), plusIsSpace); } private List percentDecode(List list, boolean plusIsSpace) { int size = list.size(); List result = new ArrayList<>(size); for (int i = 0; i < size; i++) { String s = list.get(i); result.add(s != null ? percentDecode(s, plusIsSpace) : null); } return Collections.unmodifiableList(result); } static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) { for (int i = pos; i < limit; i++) { char c = encoded.charAt(i); if (c == '%' || (c == '+' && plusIsSpace)) { // Slow path: the character at i requires decoding! Buffer out = new Buffer(); out.writeUtf8(encoded, pos, i); percentDecode(out, encoded, i, limit, plusIsSpace); return out.readUtf8(); } } // Fast path: no characters in [pos..limit) required decoding. return encoded.substring(pos, limit); } static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) { int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = encoded.codePointAt(i); if (codePoint == '%' && i + 2 < limit) { int d1 = decodeHexDigit(encoded.charAt(i + 1)); int d2 = decodeHexDigit(encoded.charAt(i + 2)); if (d1 != -1 && d2 != -1) { out.writeByte((d1 << 4) + d2); i += 2; continue; } } else if (codePoint == '+' && plusIsSpace) { out.writeByte(' '); continue; } out.writeUtf8CodePoint(codePoint); } } static boolean percentEncoded(String encoded, int pos, int limit) { return pos + 2 < limit && encoded.charAt(pos) == '%' && decodeHexDigit(encoded.charAt(pos + 1)) != -1 && decodeHexDigit(encoded.charAt(pos + 2)) != -1; } /** * Returns a substring of {@code input} on the range {@code [pos..limit)} with the following * transformations: *

    *
  • Tabs, newlines, form feeds and carriage returns are skipped. *
  • In queries, ' ' is encoded to '+' and '+' is encoded to "%2B". *
  • Characters in {@code encodeSet} are percent-encoded. *
  • Control characters and non-ASCII characters are percent-encoded. *
  • All other characters are copied without transformation. *
* * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'. * @param strict true to encode '%' if it is not the prefix of a valid percent encoding. * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded. * @param asciiOnly true to encode all non-ASCII codepoints. * @param charset which charset to use, null equals UTF-8. */ static String canonicalize(String input, int pos, int limit, String encodeSet, boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly, Charset charset) { int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = input.codePointAt(i); if (codePoint < 0x20 || codePoint == 0x7f || codePoint >= 0x80 && asciiOnly || encodeSet.indexOf(codePoint) != -1 || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit)) || codePoint == '+' && plusIsSpace) { // Slow path: the character at i requires encoding! Buffer out = new Buffer(); out.writeUtf8(input, pos, i); canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly, charset); return out.readUtf8(); } } // Fast path: no characters in [pos..limit) required encoding. return input.substring(pos, limit); } static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet, boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly, Charset charset) { Buffer encodedCharBuffer = null; // Lazily allocated. int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = input.codePointAt(i); if (alreadyEncoded && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) { // Skip this character. } else if (codePoint == '+' && plusIsSpace) { // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'. out.writeUtf8(alreadyEncoded ? "+" : "%2B"); } else if (codePoint < 0x20 || codePoint == 0x7f || codePoint >= 0x80 && asciiOnly || encodeSet.indexOf(codePoint) != -1 || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) { // Percent encode this character. if (encodedCharBuffer == null) { encodedCharBuffer = new Buffer(); } if (charset == null || charset.equals(Util.UTF_8)) { encodedCharBuffer.writeUtf8CodePoint(codePoint); } else { encodedCharBuffer.writeString(input, i, i + Character.charCount(codePoint), charset); } while (!encodedCharBuffer.exhausted()) { int b = encodedCharBuffer.readByte() & 0xff; out.writeByte('%'); out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]); out.writeByte(HEX_DIGITS[b & 0xf]); } } else { // This character doesn't need encoding. Just copy it over. out.writeUtf8CodePoint(codePoint); } } } static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly, Charset charset) { return canonicalize( input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly, charset); } static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { return canonicalize( input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly, null); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy