All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jvmMain.okhttp3.HttpUrl.kt Maven / Gradle / Ivy

There is a newer version: 5.0.0-alpha.14
Show newest version
/*
 * Copyright (C) 2015 Square, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package okhttp3

import java.net.InetAddress
import java.net.MalformedURLException
import java.net.URI
import java.net.URISyntaxException
import java.net.URL
import java.nio.charset.Charset
import java.util.Collections
import kotlin.text.Charsets.UTF_8
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
import okhttp3.internal.canParseAsIpAddress
import okhttp3.internal.delimiterOffset
import okhttp3.internal.indexOfFirstNonAsciiWhitespace
import okhttp3.internal.indexOfLastNonAsciiWhitespace
import okhttp3.internal.parseHexDigit
import okhttp3.internal.publicsuffix.PublicSuffixDatabase
import okhttp3.internal.toCanonicalHost
import okio.Buffer

/**
 * A uniform resource locator (URL) with a scheme of either `http` or `https`. Use this class to
 * compose and decompose Internet addresses. For example, this code will compose and print a URL for
 * Google search:
 *
 * ```java
 * HttpUrl url = new HttpUrl.Builder()
 *     .scheme("https")
 *     .host("www.google.com")
 *     .addPathSegment("search")
 *     .addQueryParameter("q", "polar bears")
 *     .build();
 * System.out.println(url);
 * ```
 *
 * which prints:
 *
 * ```
 * https://www.google.com/search?q=polar%20bears
 * ```
 *
 * As another example, this code prints the human-readable query parameters of a Twitter search:
 *
 * ```java
 * HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images");
 * for (int i = 0, size = url.querySize(); i < size; i++) {
 *   System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i));
 * }
 * ```
 *
 * which prints:
 *
 * ```
 * q: cute #puppies
 * f: images
 * ```
 *
 * In addition to composing URLs from their component parts and decomposing URLs into their
 * component parts, this class implements relative URL resolution: what address you'd reach by
 * clicking a relative link on a specified page. For example:
 *
 * ```java
 * HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos");
 * HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc");
 * System.out.println(link);
 * ```
 *
 * which prints:
 *
 * ```
 * https://www.youtube.com/watch?v=cbP2N1BQdYc
 * ```
 *
 * ## What's in a URL?
 *
 * A URL has several components.
 *
 * ### Scheme
 *
 * Sometimes referred to as *protocol*, A URL's scheme describes what mechanism should be used to
 * retrieve the resource. Although URLs have many schemes (`mailto`, `file`, `ftp`), this class only
 * supports `http` and `https`. Use [java.net.URI][URI] for URLs with arbitrary schemes.
 *
 * ### Username and Password
 *
 * Username and password are either present, or the empty string `""` if absent. This class offers
 * no mechanism to differentiate empty from absent. Neither of these components are popular in
 * practice. Typically HTTP applications use other mechanisms for user identification and
 * authentication.
 *
 * ### Host
 *
 * The host identifies the webserver that serves the URL's resource. It is either a hostname like
 * `square.com` or `localhost`, an IPv4 address like `192.168.0.1`, or an IPv6 address like `::1`.
 *
 * Usually a webserver is reachable with multiple identifiers: its IP addresses, registered
 * domain names, and even `localhost` when connecting from the server itself. Each of a web server's
 * names is a distinct URL and they are not interchangeable. For example, even if
 * `http://square.github.io/dagger` and `http://google.github.io/dagger` are served by the same IP
 * address, the two URLs identify different resources.
 *
 * ### Port
 *
 * The port used to connect to the web server. By default this is 80 for HTTP and 443 for HTTPS.
 * This class never returns -1 for the port: if no port is explicitly specified in the URL then the
 * scheme's default is used.
 *
 * ### Path
 *
 * The path identifies a specific resource on the host. Paths have a hierarchical structure like
 * "/square/okhttp/issues/1486" and decompose into a list of segments like `["square", "okhttp",
 * "issues", "1486"]`.
 *
 * This class offers methods to compose and decompose paths by segment. It composes each path
 * from a list of segments by alternating between "/" and the encoded segment. For example the
 * segments `["a", "b"]` build "/a/b" and the segments `["a", "b", ""]` build "/a/b/".
 *
 * If a path's last segment is the empty string then the path ends with "/". This class always
 * builds non-empty paths: if the path is omitted it defaults to "/". The default path's segment
 * list is a single empty string: `[""]`.
 *
 * ### Query
 *
 * The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query string
 * is subdivided into a collection of name-value parameters. This class offers methods to set the
 * query as the single string, or as individual name-value parameters. With name-value parameters
 * the values are optional and names may be repeated.
 *
 * ### Fragment
 *
 * The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and
 * query the fragment is not sent to the webserver: it's private to the client.
 *
 * ## Encoding
 *
 * Each component must be encoded before it is embedded in the complete URL. As we saw above, the
 * string `cute #puppies` is encoded as `cute%20%23puppies` when used as a query parameter value.
 *
 * ### Percent encoding
 *
 * Percent encoding replaces a character (like `\ud83c\udf69`) with its UTF-8 hex bytes (like
 * `%F0%9F%8D%A9`). This approach works for whitespace characters, control characters, non-ASCII
 * characters, and characters that already have another meaning in a particular context.
 *
 * Percent encoding is used in every URL component except for the hostname. But the set of
 * characters that need to be encoded is different for each component. For example, the path
 * component must escape all of its `?` characters, otherwise it could be interpreted as the
 * start of the URL's query. But within the query and fragment components, the `?` character
 * doesn't delimit anything and doesn't need to be escaped.
 *
 * ```java
 * HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder()
 *     .addPathSegment("_Who?_")
 *     .query("_Who?_")
 *     .fragment("_Who?_")
 *     .build();
 * System.out.println(url);
 * ```
 *
 * This prints:
 *
 * ```
 * http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_
 * ```
 *
 * When parsing URLs that lack percent encoding where it is required, this class will percent encode
 * the offending characters.
 *
 * ### IDNA Mapping and Punycode encoding
 *
 * Hostnames have different requirements and use a different encoding scheme. It consists of IDNA
 * mapping and Punycode encoding.
 *
 * In order to avoid confusion and discourage phishing attacks, [IDNA Mapping][idna] transforms
 * names to avoid confusing characters. This includes basic case folding: transforming shouting
 * `SQUARE.COM` into cool and casual `square.com`. It also handles more exotic characters. For
 * example, the Unicode trademark sign (™) could be confused for the letters "TM" in
 * `http://ho™ail.com`. To mitigate this, the single character (™) maps to the string (tm). There
 * is similar policy for all of the 1.1 million Unicode code points. Note that some code points such
 * as "\ud83c\udf69" are not mapped and cannot be used in a hostname.
 *
 * [Punycode](http://ietf.org/rfc/rfc3492.txt) converts a Unicode string to an ASCII string to make
 * international domain names work everywhere. For example, "σ" encodes as "xn--4xa". The encoded
 * string is not human readable, but can be used with classes like [InetAddress] to establish
 * connections.
 *
 * ## Why another URL model?
 *
 * Java includes both [java.net.URL][URL] and [java.net.URI][URI]. We offer a new URL
 * model to address problems that the others don't.
 *
 * ### Different URLs should be different
 *
 * Although they have different content, `java.net.URL` considers the following two URLs
 * equal, and the [equals()][Object.equals] method between them returns true:
 *
 *  * https://example.net/
 *
 *  * https://example.com/
 *
 * This is because those two hosts share the same IP address. This is an old, bad design decision
 * that makes `java.net.URL` unusable for many things. It shouldn't be used as a [Map] key or in a
 * [Set]. Doing so is both inefficient because equality may require a DNS lookup, and incorrect
 * because unequal URLs may be equal because of how they are hosted.
 *
 * ### Equal URLs should be equal
 *
 * These two URLs are semantically identical, but `java.net.URI` disagrees:
 *
 *  * http://host:80/
 *
 *  * http://host
 *
 * Both the unnecessary port specification (`:80`) and the absent trailing slash (`/`) cause URI to
 * bucket the two URLs separately. This harms URI's usefulness in collections. Any application that
 * stores information-per-URL will need to either canonicalize manually, or suffer unnecessary
 * redundancy for such URLs.
 *
 * Because they don't attempt canonical form, these classes are surprisingly difficult to use
 * securely. Suppose you're building a webservice that checks that incoming paths are prefixed
 * "/static/images/" before serving the corresponding assets from the filesystem.
 *
 * ```java
 * String attack = "http://example.com/static/images/../../../../../etc/passwd";
 * System.out.println(new URL(attack).getPath());
 * System.out.println(new URI(attack).getPath());
 * System.out.println(HttpUrl.parse(attack).encodedPath());
 * ```
 *
 * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that
 * checks only the path prefix may suffer!
 *
 * ```
 * /static/images/../../../../../etc/passwd
 * /static/images/../../../../../etc/passwd
 * /etc/passwd
 * ```
 *
 * ### If it works on the web, it should work in your application
 *
 * The `java.net.URI` class is strict around what URLs it accepts. It rejects URLs like
 * `http://example.com/abc|def` because the `|` character is unsupported. This class is more
 * forgiving: it will automatically percent-encode the `|'` yielding `http://example.com/abc%7Cdef`.
 * This kind behavior is consistent with web browsers. `HttpUrl` prefers consistency with major web
 * browsers over consistency with obsolete specifications.
 *
 * ### Paths and Queries should decompose
 *
 * Neither of the built-in URL models offer direct access to path segments or query parameters.
 * Manually using `StringBuilder` to assemble these components is cumbersome: do '+' characters get
 * silently replaced with spaces? If a query parameter contains a '&', does that get escaped?
 * By offering methods to read and write individual query parameters directly, application
 * developers are saved from the hassles of encoding and decoding.
 *
 * ### Plus a modern API
 *
 * The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping
 * constructors. For example, there's no API to compose a URI with a custom port without also
 * providing a query and fragment.
 *
 * Instances of [HttpUrl] are well-formed and always have a scheme, host, and path. With
 * `java.net.URL` it's possible to create an awkward URL like `http:/` with scheme and path but no
 * hostname. Building APIs that consume such malformed values is difficult!
 *
 * This class has a modern API. It avoids punitive checked exceptions: [toHttpUrl] throws
 * [IllegalArgumentException] on invalid input or [toHttpUrlOrNull] returns null if the input is an
 * invalid URL. You can even be explicit about whether each component has been encoded already.
 *
 * [idna]: http://www.unicode.org/reports/tr46/#ToASCII
 */
class HttpUrl internal constructor(
  /** Either "http" or "https". */
  @get:JvmName("scheme") val scheme: String,

  /**
   * The decoded username, or an empty string if none is present.
   *
   * | URL                              | `username()` |
   * | :------------------------------- | :----------- |
   * | `http://host/`                   | `""`         |
   * | `http://username@host/`          | `"username"` |
   * | `http://username:password@host/` | `"username"` |
   * | `http://a%20b:c%20d@host/`       | `"a b"`      |
   */
  @get:JvmName("username") val username: String,

  /**
   * Returns the decoded password, or an empty string if none is present.
   *
   * | URL                              | `password()` |
   * | :------------------------------- | :----------- |
   * | `http://host/`                   | `""`         |
   * | `http://username@host/`          | `""`         |
   * | `http://username:password@host/` | `"password"` |
   * | `http://a%20b:c%20d@host/`       | `"c d"`      |
   */
  @get:JvmName("password") val password: String,

  /**
   * The host address suitable for use with [InetAddress.getAllByName]. May be:
   *
   *  * A regular host name, like `android.com`.
   *
   *  * An IPv4 address, like `127.0.0.1`.
   *
   *  * An IPv6 address, like `::1`. Note that there are no square braces.
   *
   *  * An encoded IDN, like `xn--n3h.net`.
   *
   * | URL                   | `host()`        |
   * | :-------------------- | :-------------- |
   * | `http://android.com/` | `"android.com"` |
   * | `http://127.0.0.1/`   | `"127.0.0.1"`   |
   * | `http://[::1]/`       | `"::1"`         |
   * | `http://xn--n3h.net/` | `"xn--n3h.net"` |
   */
  @get:JvmName("host") val host: String,

  /**
   * The explicitly-specified port if one was provided, or the default port for this URL's scheme.
   * For example, this returns 8443 for `https://square.com:8443/` and 443 for
   * `https://square.com/`. The result is in `[1..65535]`.
   *
   * | URL                 | `port()` |
   * | :------------------ | :------- |
   * | `http://host/`      | `80`     |
   * | `http://host:8000/` | `8000`   |
   * | `https://host/`     | `443`    |
   */
  @get:JvmName("port") val port: Int,

  /**
   * A list of path segments like `["a", "b", "c"]` for the URL `http://host/a/b/c`. This list is
   * never empty though it may contain a single empty string.
   *
   * | URL                      | `pathSegments()`    |
   * | :----------------------- | :------------------ |
   * | `http://host/`           | `[""]`              |
   * | `http://host/a/b/c"`     | `["a", "b", "c"]`   |
   * | `http://host/a/b%20c/d"` | `["a", "b c", "d"]` |
   */
  @get:JvmName("pathSegments") val pathSegments: List,

  /**
   * Alternating, decoded query names and values, or null for no query. Names may be empty or
   * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or
   * empty, or non-empty.
   */
  private val queryNamesAndValues: List?,

  /**
   * This URL's fragment, like `"abc"` for `http://host/#abc`. This is null if the URL has no
   * fragment.
   *
   * | URL                    | `fragment()` |
   * | :--------------------- | :----------- |
   * | `http://host/`         | null         |
   * | `http://host/#`        | `""`         |
   * | `http://host/#abc`     | `"abc"`      |
   * | `http://host/#abc|def` | `"abc|def"`  |
   */
  @get:JvmName("fragment") val fragment: String?,

  /** Canonical URL. */
  private val url: String
) {
  val isHttps: Boolean = scheme == "https"

  /** Returns this URL as a [java.net.URL][URL]. */
  @JvmName("url") fun toUrl(): URL {
    try {
      return URL(url)
    } catch (e: MalformedURLException) {
      throw RuntimeException(e) // Unexpected!
    }
  }

  /**
   * Returns this URL as a [java.net.URI][URI]. Because `URI` is more strict than this class, the
   * returned URI may be semantically different from this URL:
   *
   *  * Characters forbidden by URI like `[` and `|` will be escaped.
   *
   *  * Invalid percent-encoded sequences like `%xx` will be encoded like `%25xx`.
   *
   *  * Whitespace and control characters in the fragment will be stripped.
   *
   * These differences may have a significant consequence when the URI is interpreted by a
   * web server. For this reason the [URI class][URI] and this method should be avoided.
   */
  @JvmName("uri") fun toUri(): URI {
    val uri = newBuilder().reencodeForUri().toString()
    return try {
      URI(uri)
    } catch (e: URISyntaxException) {
      // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry.
      try {
        val stripped = uri.replace(Regex("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]"), "")
        URI.create(stripped)
      } catch (e1: Exception) {
        throw RuntimeException(e) // Unexpected!
      }
    }
  }

  /**
   * The username, or an empty string if none is set.
   *
   * | URL                              | `encodedUsername()` |
   * | :------------------------------- | :------------------ |
   * | `http://host/`                   | `""`                |
   * | `http://username@host/`          | `"username"`        |
   * | `http://username:password@host/` | `"username"`        |
   * | `http://a%20b:c%20d@host/`       | `"a%20b"`           |
   */
  @get:JvmName("encodedUsername") val encodedUsername: String
    get() {
      if (username.isEmpty()) return ""
      val usernameStart = scheme.length + 3 // "://".length() == 3.
      val usernameEnd = url.delimiterOffset(":@", usernameStart, url.length)
      return url.substring(usernameStart, usernameEnd)
    }

  /**
   * The password, or an empty string if none is set.
   *
   * | URL                              | `encodedPassword()` |
   * | :--------------------------------| :------------------ |
   * | `http://host/`                   | `""`                |
   * | `http://username@host/`          | `""`                |
   * | `http://username:password@host/` | `"password"`        |
   * | `http://a%20b:c%20d@host/`       | `"c%20d"`           |
   */
  @get:JvmName("encodedPassword") val encodedPassword: String
    get() {
      if (password.isEmpty()) return ""
      val passwordStart = url.indexOf(':', scheme.length + 3) + 1
      val passwordEnd = url.indexOf('@')
      return url.substring(passwordStart, passwordEnd)
    }

  /**
   * The number of segments in this URL's path. This is also the number of slashes in this URL's
   * path, like 3 in `http://host/a/b/c`. This is always at least 1.
   *
   * | URL                  | `pathSize()` |
   * | :------------------- | :----------- |
   * | `http://host/`       | `1`          |
   * | `http://host/a/b/c`  | `3`          |
   * | `http://host/a/b/c/` | `4`          |
   */
  @get:JvmName("pathSize") val pathSize: Int get() = pathSegments.size

  /**
   * The entire path of this URL encoded for use in HTTP resource resolution. The returned path will
   * start with `"/"`.
   *
   * | URL                     | `encodedPath()` |
   * | :---------------------- | :-------------- |
   * | `http://host/`          | `"/"`           |
   * | `http://host/a/b/c`     | `"/a/b/c"`      |
   * | `http://host/a/b%20c/d` | `"/a/b%20c/d"`  |
   */
  @get:JvmName("encodedPath") val encodedPath: String
    get() {
      val pathStart = url.indexOf('/', scheme.length + 3) // "://".length() == 3.
      val pathEnd = url.delimiterOffset("?#", pathStart, url.length)
      return url.substring(pathStart, pathEnd)
    }

  /**
   * A list of encoded path segments like `["a", "b", "c"]` for the URL `http://host/a/b/c`. This
   * list is never empty though it may contain a single empty string.
   *
   * | URL                     | `encodedPathSegments()` |
   * | :---------------------- | :---------------------- |
   * | `http://host/`          | `[""]`                  |
   * | `http://host/a/b/c`     | `["a", "b", "c"]`       |
   * | `http://host/a/b%20c/d` | `["a", "b%20c", "d"]`   |
   */
  @get:JvmName("encodedPathSegments") val encodedPathSegments: List
    get() {
      val pathStart = url.indexOf('/', scheme.length + 3)
      val pathEnd = url.delimiterOffset("?#", pathStart, url.length)
      val result = mutableListOf()
      var i = pathStart
      while (i < pathEnd) {
        i++ // Skip the '/'.
        val segmentEnd = url.delimiterOffset('/', i, pathEnd)
        result.add(url.substring(i, segmentEnd))
        i = segmentEnd
      }
      return result
    }

  /**
   * The query of this URL, encoded for use in HTTP resource resolution. This string may be null
   * (for URLs with no query), empty (for URLs with an empty query) or non-empty (all other URLs).
   *
   * | URL                               | `encodedQuery()`       |
   * | :-------------------------------- | :--------------------- |
   * | `http://host/`                    | null                   |
   * | `http://host/?`                   | `""`                   |
   * | `http://host/?a=apple&k=key+lime` | `"a=apple&k=key+lime"` |
   * | `http://host/?a=apple&a=apricot`  | `"a=apple&a=apricot"`  |
   * | `http://host/?a=apple&b`          | `"a=apple&b"`          |
   */
  @get:JvmName("encodedQuery") val encodedQuery: String?
    get() {
      if (queryNamesAndValues == null) return null // No query.
      val queryStart = url.indexOf('?') + 1
      val queryEnd = url.delimiterOffset('#', queryStart, url.length)
      return url.substring(queryStart, queryEnd)
    }

  /**
   * This URL's query, like `"abc"` for `http://host/?abc`. Most callers should prefer
   * [queryParameterName] and [queryParameterValue] because these methods offer direct access to
   * individual query parameters.
   *
   * | URL                               | `query()`              |
   * | :-------------------------------- | :--------------------- |
   * | `http://host/`                    | null                   |
   * | `http://host/?`                   | `""`                   |
   * | `http://host/?a=apple&k=key+lime` | `"a=apple&k=key lime"` |
   * | `http://host/?a=apple&a=apricot`  | `"a=apple&a=apricot"`  |
   * | `http://host/?a=apple&b`          | `"a=apple&b"`          |
   */
  @get:JvmName("query") val query: String?
    get() {
      if (queryNamesAndValues == null) return null // No query.
      val result = StringBuilder()
      queryNamesAndValues.toQueryString(result)
      return result.toString()
    }

  /**
   * The number of query parameters in this URL, like 2 for `http://host/?a=apple&b=banana`. If this
   * URL has no query this is 0. Otherwise it is one more than the number of `"&"` separators in the
   * query.
   *
   * | URL                               | `querySize()` |
   * | :-------------------------------- | :------------ |
   * | `http://host/`                    | `0`           |
   * | `http://host/?`                   | `1`           |
   * | `http://host/?a=apple&k=key+lime` | `2`           |
   * | `http://host/?a=apple&a=apricot`  | `2`           |
   * | `http://host/?a=apple&b`          | `2`           |
   */
  @get:JvmName("querySize") val querySize: Int
    get() {
      return if (queryNamesAndValues != null) queryNamesAndValues.size / 2 else 0
    }

  /**
   * The first query parameter named `name` decoded using UTF-8, or null if there is no such query
   * parameter.
   *
   * | URL                               | `queryParameter("a")` |
   * | :-------------------------------- | :-------------------- |
   * | `http://host/`                    | null                  |
   * | `http://host/?`                   | null                  |
   * | `http://host/?a=apple&k=key+lime` | `"apple"`             |
   * | `http://host/?a=apple&a=apricot`  | `"apple"`             |
   * | `http://host/?a=apple&b`          | `"apple"`             |
   */
  fun queryParameter(name: String): String? {
    if (queryNamesAndValues == null) return null
    for (i in 0 until queryNamesAndValues.size step 2) {
      if (name == queryNamesAndValues[i]) {
        return queryNamesAndValues[i + 1]
      }
    }
    return null
  }

  /**
   * The distinct query parameter names in this URL, like `["a", "b"]` for
   * `http://host/?a=apple&b=banana`. If this URL has no query this is the empty set.
   *
   * | URL                               | `queryParameterNames()` |
   * | :-------------------------------- | :---------------------- |
   * | `http://host/`                    | `[]`                    |
   * | `http://host/?`                   | `[""]`                  |
   * | `http://host/?a=apple&k=key+lime` | `["a", "k"]`            |
   * | `http://host/?a=apple&a=apricot`  | `["a"]`                 |
   * | `http://host/?a=apple&b`          | `["a", "b"]`            |
   */
  @get:JvmName("queryParameterNames") val queryParameterNames: Set
    get() {
      if (queryNamesAndValues == null) return emptySet()
      val result = LinkedHashSet()
      for (i in 0 until queryNamesAndValues.size step 2) {
        result.add(queryNamesAndValues[i]!!)
      }
      return Collections.unmodifiableSet(result)
    }

  /**
   * Returns all values for the query parameter `name` ordered by their appearance in this
   * URL. For example this returns `["banana"]` for `queryParameterValue("b")` on
   * `http://host/?a=apple&b=banana`.
   *
   * | URL                               | `queryParameterValues("a")` | `queryParameterValues("b")` |
   * | :-------------------------------- | :-------------------------- | :-------------------------- |
   * | `http://host/`                    | `[]`                        | `[]`                        |
   * | `http://host/?`                   | `[]`                        | `[]`                        |
   * | `http://host/?a=apple&k=key+lime` | `["apple"]`                 | `[]`                        |
   * | `http://host/?a=apple&a=apricot`  | `["apple", "apricot"]`      | `[]`                        |
   * | `http://host/?a=apple&b`          | `["apple"]`                 | `[null]`                    |
   */
  fun queryParameterValues(name: String): List {
    if (queryNamesAndValues == null) return emptyList()
    val result = mutableListOf()
    for (i in 0 until queryNamesAndValues.size step 2) {
      if (name == queryNamesAndValues[i]) {
        result.add(queryNamesAndValues[i + 1])
      }
    }
    return Collections.unmodifiableList(result)
  }

  /**
   * Returns the name of the query parameter at `index`. For example this returns `"a"`
   * for `queryParameterName(0)` on `http://host/?a=apple&b=banana`. This throws if
   * `index` is not less than the [query size][querySize].
   *
   * | URL                               | `queryParameterName(0)` | `queryParameterName(1)` |
   * | :-------------------------------- | :---------------------- | :---------------------- |
   * | `http://host/`                    | exception               | exception               |
   * | `http://host/?`                   | `""`                    | exception               |
   * | `http://host/?a=apple&k=key+lime` | `"a"`                   | `"k"`                   |
   * | `http://host/?a=apple&a=apricot`  | `"a"`                   | `"a"`                   |
   * | `http://host/?a=apple&b`          | `"a"`                   | `"b"`                   |
   */
  fun queryParameterName(index: Int): String {
    if (queryNamesAndValues == null) throw IndexOutOfBoundsException()
    return queryNamesAndValues[index * 2]!!
  }

  /**
   * Returns the value of the query parameter at `index`. For example this returns `"apple"` for
   * `queryParameterName(0)` on `http://host/?a=apple&b=banana`. This throws if `index` is not less
   * than the [query size][querySize].
   *
   * | URL                               | `queryParameterValue(0)` | `queryParameterValue(1)` |
   * | :-------------------------------- | :----------------------- | :----------------------- |
   * | `http://host/`                    | exception                | exception                |
   * | `http://host/?`                   | null                     | exception                |
   * | `http://host/?a=apple&k=key+lime` | `"apple"`                | `"key lime"`             |
   * | `http://host/?a=apple&a=apricot`  | `"apple"`                | `"apricot"`              |
   * | `http://host/?a=apple&b`          | `"apple"`                | null                     |
   */
  fun queryParameterValue(index: Int): String? {
    if (queryNamesAndValues == null) throw IndexOutOfBoundsException()
    return queryNamesAndValues[index * 2 + 1]
  }

  /**
   * This URL's encoded fragment, like `"abc"` for `http://host/#abc`. This is null if the URL has
   * no fragment.
   *
   * | URL                    | `encodedFragment()` |
   * | :--------------------- | :------------------ |
   * | `http://host/`         | null                |
   * | `http://host/#`        | `""`                |
   * | `http://host/#abc`     | `"abc"`             |
   * | `http://host/#abc|def` | `"abc|def"`         |
   */
  @get:JvmName("encodedFragment") val encodedFragment: String?
    get() {
    if (fragment == null) return null
    val fragmentStart = url.indexOf('#') + 1
    return url.substring(fragmentStart)
  }

  /**
   * Returns a string with containing this URL with its username, password, query, and fragment
   * stripped, and its path replaced with `/...`. For example, redacting
   * `http://username:[email protected]/path` returns `http://example.com/...`.
   */
  fun redact(): String {
    return newBuilder("/...")!!
        .username("")
        .password("")
        .build()
        .toString()
  }

  /**
   * Returns the URL that would be retrieved by following `link` from this URL, or null if the
   * resulting URL is not well-formed.
   */
  fun resolve(link: String): HttpUrl? = newBuilder(link)?.build()

  /**
   * Returns a builder based on this URL.
   */
  fun newBuilder(): Builder {
    val result = Builder()
    result.scheme = scheme
    result.encodedUsername = encodedUsername
    result.encodedPassword = encodedPassword
    result.host = host
    // If we're set to a default port, unset it in case of a scheme change.
    result.port = if (port != defaultPort(scheme)) port else -1
    result.encodedPathSegments.clear()
    result.encodedPathSegments.addAll(encodedPathSegments)
    result.encodedQuery(encodedQuery)
    result.encodedFragment = encodedFragment
    return result
  }

  /**
   * Returns a builder for the URL that would be retrieved by following `link` from this URL,
   * or null if the resulting URL is not well-formed.
   */
  fun newBuilder(link: String): Builder? {
    return try {
      Builder().parse(this, link)
    } catch (_: IllegalArgumentException) {
      null
    }
  }

  override fun equals(other: Any?): Boolean {
    return other is HttpUrl && other.url == url
  }

  override fun hashCode(): Int = url.hashCode()

  override fun toString(): String = url

  /**
   * Returns the domain name of this URL's [host] that is one level beneath the public suffix by
   * consulting the [public suffix list](https://publicsuffix.org). Returns null if this URL's
   * [host] is an IP address or is considered a public suffix by the public suffix list.
   *
   * In general this method **should not** be used to test whether a domain is valid or routable.
   * Instead, DNS is the recommended source for that information.
   *
   * | URL                           | `topPrivateDomain()` |
   * | :---------------------------- | :------------------- |
   * | `http://google.com`           | `"google.com"`       |
   * | `http://adwords.google.co.uk` | `"google.co.uk"`     |
   * | `http://square`               | null                 |
   * | `http://co.uk`                | null                 |
   * | `http://localhost`            | null                 |
   * | `http://127.0.0.1`            | null                 |
   */
  fun topPrivateDomain(): String? {
    return if (host.canParseAsIpAddress()) {
      null
    } else {
      PublicSuffixDatabase.get().getEffectiveTldPlusOne(host)
    }
  }

  @JvmName("-deprecated_url")
  @Deprecated(
      message = "moved to toUrl()",
      replaceWith = ReplaceWith(expression = "toUrl()"),
      level = DeprecationLevel.ERROR)
  fun url(): URL = toUrl()

  @JvmName("-deprecated_uri")
  @Deprecated(
      message = "moved to toUri()",
      replaceWith = ReplaceWith(expression = "toUri()"),
      level = DeprecationLevel.ERROR)
  fun uri(): URI = toUri()

  @JvmName("-deprecated_scheme")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "scheme"),
      level = DeprecationLevel.ERROR)
  fun scheme(): String = scheme

  @JvmName("-deprecated_encodedUsername")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "encodedUsername"),
      level = DeprecationLevel.ERROR)
  fun encodedUsername(): String = encodedUsername

  @JvmName("-deprecated_username")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "username"),
      level = DeprecationLevel.ERROR)
  fun username(): String = username

  @JvmName("-deprecated_encodedPassword")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "encodedPassword"),
      level = DeprecationLevel.ERROR)
  fun encodedPassword(): String = encodedPassword

  @JvmName("-deprecated_password")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "password"),
      level = DeprecationLevel.ERROR)
  fun password(): String = password

  @JvmName("-deprecated_host")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "host"),
      level = DeprecationLevel.ERROR)
  fun host(): String = host

  @JvmName("-deprecated_port")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "port"),
      level = DeprecationLevel.ERROR)
  fun port(): Int = port

  @JvmName("-deprecated_pathSize")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "pathSize"),
      level = DeprecationLevel.ERROR)
  fun pathSize(): Int = pathSize

  @JvmName("-deprecated_encodedPath")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "encodedPath"),
      level = DeprecationLevel.ERROR)
  fun encodedPath(): String = encodedPath

  @JvmName("-deprecated_encodedPathSegments")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "encodedPathSegments"),
      level = DeprecationLevel.ERROR)
  fun encodedPathSegments(): List = encodedPathSegments

  @JvmName("-deprecated_pathSegments")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "pathSegments"),
      level = DeprecationLevel.ERROR)
  fun pathSegments(): List = pathSegments

  @JvmName("-deprecated_encodedQuery")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "encodedQuery"),
      level = DeprecationLevel.ERROR)
  fun encodedQuery(): String? = encodedQuery

  @JvmName("-deprecated_query")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "query"),
      level = DeprecationLevel.ERROR)
  fun query(): String? = query

  @JvmName("-deprecated_querySize")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "querySize"),
      level = DeprecationLevel.ERROR)
  fun querySize(): Int = querySize

  @JvmName("-deprecated_queryParameterNames")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "queryParameterNames"),
      level = DeprecationLevel.ERROR)
  fun queryParameterNames(): Set = queryParameterNames

  @JvmName("-deprecated_encodedFragment")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "encodedFragment"),
      level = DeprecationLevel.ERROR)
  fun encodedFragment(): String? = encodedFragment

  @JvmName("-deprecated_fragment")
  @Deprecated(
      message = "moved to val",
      replaceWith = ReplaceWith(expression = "fragment"),
      level = DeprecationLevel.ERROR)
  fun fragment(): String? = fragment

  class Builder {
    internal var scheme: String? = null
    internal var encodedUsername = ""
    internal var encodedPassword = ""
    internal var host: String? = null
    internal var port = -1
    internal val encodedPathSegments = mutableListOf()
    internal var encodedQueryNamesAndValues: MutableList? = null
    internal var encodedFragment: String? = null

    init {
      encodedPathSegments.add("") // The default path is '/' which needs a trailing space.
    }

    /**
     * @param scheme either "http" or "https".
     */
    fun scheme(scheme: String) = apply {
      when {
        scheme.equals("http", ignoreCase = true) -> this.scheme = "http"
        scheme.equals("https", ignoreCase = true) -> this.scheme = "https"
        else -> throw IllegalArgumentException("unexpected scheme: $scheme")
      }
    }

    fun username(username: String) = apply {
      this.encodedUsername = username.canonicalize(encodeSet = USERNAME_ENCODE_SET)
    }

    fun encodedUsername(encodedUsername: String) = apply {
      this.encodedUsername = encodedUsername.canonicalize(
          encodeSet = USERNAME_ENCODE_SET,
          alreadyEncoded = true
      )
    }

    fun password(password: String) = apply {
      this.encodedPassword = password.canonicalize(encodeSet = PASSWORD_ENCODE_SET)
    }

    fun encodedPassword(encodedPassword: String) = apply {
      this.encodedPassword = encodedPassword.canonicalize(
          encodeSet = PASSWORD_ENCODE_SET,
          alreadyEncoded = true
      )
    }

    /**
     * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6
     * address.
     */
    fun host(host: String) = apply {
      val encoded = host.percentDecode().toCanonicalHost() ?: throw IllegalArgumentException(
          "unexpected host: $host")
      this.host = encoded
    }

    fun port(port: Int) = apply {
      require(port in 1..65535) { "unexpected port: $port" }
      this.port = port
    }

    private fun effectivePort(): Int {
      return if (port != -1) port else defaultPort(scheme!!)
    }

    fun addPathSegment(pathSegment: String) = apply {
      push(pathSegment, 0, pathSegment.length, addTrailingSlash = false, alreadyEncoded = false)
    }

    /**
     * Adds a set of path segments separated by a slash (either `\` or `/`). If `pathSegments`
     * starts with a slash, the resulting URL will have empty path segment.
     */
    fun addPathSegments(pathSegments: String): Builder = addPathSegments(pathSegments, false)

    fun addEncodedPathSegment(encodedPathSegment: String) = apply {
      push(encodedPathSegment, 0, encodedPathSegment.length, addTrailingSlash = false,
          alreadyEncoded = true)
    }

    /**
     * Adds a set of encoded path segments separated by a slash (either `\` or `/`). If
     * `encodedPathSegments` starts with a slash, the resulting URL will have empty path segment.
     */
    fun addEncodedPathSegments(encodedPathSegments: String): Builder =
        addPathSegments(encodedPathSegments, true)

    private fun addPathSegments(pathSegments: String, alreadyEncoded: Boolean) = apply {
      var offset = 0
      do {
        val segmentEnd = pathSegments.delimiterOffset("/\\", offset, pathSegments.length)
        val addTrailingSlash = segmentEnd < pathSegments.length
        push(pathSegments, offset, segmentEnd, addTrailingSlash, alreadyEncoded)
        offset = segmentEnd + 1
      } while (offset <= pathSegments.length)
    }

    fun setPathSegment(index: Int, pathSegment: String) = apply {
      val canonicalPathSegment = pathSegment.canonicalize(encodeSet = PATH_SEGMENT_ENCODE_SET)
      require(!isDot(canonicalPathSegment) && !isDotDot(canonicalPathSegment)) {
        "unexpected path segment: $pathSegment"
      }
      encodedPathSegments[index] = canonicalPathSegment
    }

    fun setEncodedPathSegment(index: Int, encodedPathSegment: String) = apply {
      val canonicalPathSegment = encodedPathSegment.canonicalize(
          encodeSet = PATH_SEGMENT_ENCODE_SET,
          alreadyEncoded = true
      )
      encodedPathSegments[index] = canonicalPathSegment
      require(!isDot(canonicalPathSegment) && !isDotDot(canonicalPathSegment)) {
        "unexpected path segment: $encodedPathSegment"
      }
    }

    fun removePathSegment(index: Int) = apply {
      encodedPathSegments.removeAt(index)
      if (encodedPathSegments.isEmpty()) {
        encodedPathSegments.add("") // Always leave at least one '/'.
      }
    }

    fun encodedPath(encodedPath: String) = apply {
      require(encodedPath.startsWith("/")) { "unexpected encodedPath: $encodedPath" }
      resolvePath(encodedPath, 0, encodedPath.length)
    }

    fun query(query: String?) = apply {
      this.encodedQueryNamesAndValues = query?.canonicalize(
          encodeSet = QUERY_ENCODE_SET,
          plusIsSpace = true
      )?.toQueryNamesAndValues()
    }

    fun encodedQuery(encodedQuery: String?) = apply {
      this.encodedQueryNamesAndValues = encodedQuery?.canonicalize(
          encodeSet = QUERY_ENCODE_SET,
          alreadyEncoded = true,
          plusIsSpace = true
      )?.toQueryNamesAndValues()
    }

    /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */
    fun addQueryParameter(name: String, value: String?) = apply {
      if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = mutableListOf()
      encodedQueryNamesAndValues!!.add(name.canonicalize(
          encodeSet = QUERY_COMPONENT_ENCODE_SET,
          plusIsSpace = true
      ))
      encodedQueryNamesAndValues!!.add(value?.canonicalize(
          encodeSet = QUERY_COMPONENT_ENCODE_SET,
          plusIsSpace = true
      ))
    }

    /** Adds the pre-encoded query parameter to this URL's query string. */
    fun addEncodedQueryParameter(encodedName: String, encodedValue: String?) = apply {
      if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = mutableListOf()
      encodedQueryNamesAndValues!!.add(encodedName.canonicalize(
          encodeSet = QUERY_COMPONENT_REENCODE_SET,
          alreadyEncoded = true,
          plusIsSpace = true
      ))
      encodedQueryNamesAndValues!!.add(encodedValue?.canonicalize(
          encodeSet = QUERY_COMPONENT_REENCODE_SET,
          alreadyEncoded = true,
          plusIsSpace = true
      ))
    }

    fun setQueryParameter(name: String, value: String?) = apply {
      removeAllQueryParameters(name)
      addQueryParameter(name, value)
    }

    fun setEncodedQueryParameter(encodedName: String, encodedValue: String?) = apply {
      removeAllEncodedQueryParameters(encodedName)
      addEncodedQueryParameter(encodedName, encodedValue)
    }

    fun removeAllQueryParameters(name: String) = apply {
      if (encodedQueryNamesAndValues == null) return this
      val nameToRemove = name.canonicalize(
          encodeSet = QUERY_COMPONENT_ENCODE_SET,
          plusIsSpace = true
      )
      removeAllCanonicalQueryParameters(nameToRemove)
    }

    fun removeAllEncodedQueryParameters(encodedName: String) = apply {
      if (encodedQueryNamesAndValues == null) return this
      removeAllCanonicalQueryParameters(encodedName.canonicalize(
          encodeSet = QUERY_COMPONENT_REENCODE_SET,
          alreadyEncoded = true,
          plusIsSpace = true
      ))
    }

    private fun removeAllCanonicalQueryParameters(canonicalName: String) {
      for (i in encodedQueryNamesAndValues!!.size - 2 downTo 0 step 2) {
        if (canonicalName == encodedQueryNamesAndValues!![i]) {
          encodedQueryNamesAndValues!!.removeAt(i + 1)
          encodedQueryNamesAndValues!!.removeAt(i)
          if (encodedQueryNamesAndValues!!.isEmpty()) {
            encodedQueryNamesAndValues = null
            return
          }
        }
      }
    }

    fun fragment(fragment: String?) = apply {
      this.encodedFragment = fragment?.canonicalize(
          encodeSet = FRAGMENT_ENCODE_SET,
          unicodeAllowed = true
      )
    }

    fun encodedFragment(encodedFragment: String?) = apply {
      this.encodedFragment = encodedFragment?.canonicalize(
          encodeSet = FRAGMENT_ENCODE_SET,
          alreadyEncoded = true,
          unicodeAllowed = true
      )
    }

    /**
     * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is
     * particularly strict for certain components.
     */
    internal fun reencodeForUri() = apply {
      host = host?.replace(Regex("[\"<>^`{|}]"), "")

      for (i in 0 until encodedPathSegments.size) {
        encodedPathSegments[i] = encodedPathSegments[i].canonicalize(
            encodeSet = PATH_SEGMENT_ENCODE_SET_URI,
            alreadyEncoded = true,
            strict = true
        )
      }

      val encodedQueryNamesAndValues = this.encodedQueryNamesAndValues
      if (encodedQueryNamesAndValues != null) {
        for (i in 0 until encodedQueryNamesAndValues.size) {
          encodedQueryNamesAndValues[i] = encodedQueryNamesAndValues[i]?.canonicalize(
              encodeSet = QUERY_COMPONENT_ENCODE_SET_URI,
              alreadyEncoded = true,
              strict = true,
              plusIsSpace = true
          )
        }
      }

      encodedFragment = encodedFragment?.canonicalize(
          encodeSet = FRAGMENT_ENCODE_SET_URI,
          alreadyEncoded = true,
          strict = true,
          unicodeAllowed = true
      )
    }

    fun build(): HttpUrl {
      @Suppress("UNCHECKED_CAST") // percentDecode returns either List or List.
      return HttpUrl(
          scheme = scheme ?: throw IllegalStateException("scheme == null"),
          username = encodedUsername.percentDecode(),
          password = encodedPassword.percentDecode(),
          host = host ?: throw IllegalStateException("host == null"),
          port = effectivePort(),
          pathSegments = encodedPathSegments.map { it.percentDecode() },
          queryNamesAndValues = encodedQueryNamesAndValues?.map { it?.percentDecode(plusIsSpace = true) },
          fragment = encodedFragment?.percentDecode(),
          url = toString()
      )
    }

    override fun toString(): String {
      return buildString {
        if (scheme != null) {
          append(scheme)
          append("://")
        } else {
          append("//")
        }

        if (encodedUsername.isNotEmpty() || encodedPassword.isNotEmpty()) {
          append(encodedUsername)
          if (encodedPassword.isNotEmpty()) {
            append(':')
            append(encodedPassword)
          }
          append('@')
        }

        if (host != null) {
          if (':' in host!!) {
            // Host is an IPv6 address.
            append('[')
            append(host)
            append(']')
          } else {
            append(host)
          }
        }

        if (port != -1 || scheme != null) {
          val effectivePort = effectivePort()
          if (scheme == null || effectivePort != defaultPort(scheme!!)) {
            append(':')
            append(effectivePort)
          }
        }

        encodedPathSegments.toPathString(this)

        if (encodedQueryNamesAndValues != null) {
          append('?')
          encodedQueryNamesAndValues!!.toQueryString(this)
        }

        if (encodedFragment != null) {
          append('#')
          append(encodedFragment)
        }
      }
    }

    internal fun parse(base: HttpUrl?, input: String): Builder {
      var pos = input.indexOfFirstNonAsciiWhitespace()
      val limit = input.indexOfLastNonAsciiWhitespace(pos)

      // Scheme.
      val schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit)
      if (schemeDelimiterOffset != -1) {
        when {
          input.startsWith("https:", ignoreCase = true, startIndex = pos) -> {
            this.scheme = "https"
            pos += "https:".length
          }
          input.startsWith("http:", ignoreCase = true, startIndex = pos) -> {
            this.scheme = "http"
            pos += "http:".length
          }
          else -> throw IllegalArgumentException("Expected URL scheme 'http' or 'https' but was '" +
              input.substring(0, schemeDelimiterOffset) + "'")
        }
      } else if (base != null) {
        this.scheme = base.scheme
      } else {
        val truncated = if (input.length > 6) input.take(6) + "..." else input
        throw IllegalArgumentException(
            "Expected URL scheme 'http' or 'https' but no scheme was found for $truncated")
      }

      // Authority.
      var hasUsername = false
      var hasPassword = false
      val slashCount = input.slashCount(pos, limit)
      if (slashCount >= 2 || base == null || base.scheme != this.scheme) {
        // Read an authority if either:
        //  * The input starts with 2 or more slashes. These follow the scheme if it exists.
        //  * The input scheme exists and is different from the base URL's scheme.
        //
        // The structure of an authority is:
        //   username:password@host:port
        //
        // Username, password and port are optional.
        //   [username[:password]@]host[:port]
        pos += slashCount
        authority@ while (true) {
          val componentDelimiterOffset = input.delimiterOffset("@/\\?#", pos, limit)
          val c = if (componentDelimiterOffset != limit) {
            input[componentDelimiterOffset].code
          } else {
            -1
          }
          when (c) {
            '@'.code -> {
              // User info precedes.
              if (!hasPassword) {
                val passwordColonOffset = input.delimiterOffset(':', pos, componentDelimiterOffset)
                val canonicalUsername = input.canonicalize(
                    pos = pos,
                    limit = passwordColonOffset,
                    encodeSet = USERNAME_ENCODE_SET,
                    alreadyEncoded = true
                )
                this.encodedUsername = if (hasUsername) {
                  this.encodedUsername + "%40" + canonicalUsername
                } else {
                  canonicalUsername
                }
                if (passwordColonOffset != componentDelimiterOffset) {
                  hasPassword = true
                  this.encodedPassword = input.canonicalize(
                      pos = passwordColonOffset + 1,
                      limit = componentDelimiterOffset,
                      encodeSet = PASSWORD_ENCODE_SET,
                      alreadyEncoded = true
                  )
                }
                hasUsername = true
              } else {
                this.encodedPassword = this.encodedPassword + "%40" + input.canonicalize(
                    pos = pos,
                    limit = componentDelimiterOffset,
                    encodeSet = PASSWORD_ENCODE_SET,
                    alreadyEncoded = true
                )
              }
              pos = componentDelimiterOffset + 1
            }

            -1, '/'.code, '\\'.code, '?'.code, '#'.code -> {
              // Host info precedes.
              val portColonOffset = portColonOffset(input, pos, componentDelimiterOffset)
              if (portColonOffset + 1 < componentDelimiterOffset) {
                host = input.percentDecode(pos = pos, limit = portColonOffset).toCanonicalHost()
                port = parsePort(input, portColonOffset + 1, componentDelimiterOffset)
                require(port != -1) {
                  "Invalid URL port: \"${input.substring(portColonOffset + 1,
                      componentDelimiterOffset)}\""
                }
              } else {
                host = input.percentDecode(pos = pos, limit = portColonOffset).toCanonicalHost()
                port = defaultPort(scheme!!)
              }
              require(host != null) {
                "$INVALID_HOST: \"${input.substring(pos, portColonOffset)}\""
              }
              pos = componentDelimiterOffset
              break@authority
            }
          }
        }
      } else {
        // This is a relative link. Copy over all authority components. Also maybe the path & query.
        this.encodedUsername = base.encodedUsername
        this.encodedPassword = base.encodedPassword
        this.host = base.host
        this.port = base.port
        this.encodedPathSegments.clear()
        this.encodedPathSegments.addAll(base.encodedPathSegments)
        if (pos == limit || input[pos] == '#') {
          encodedQuery(base.encodedQuery)
        }
      }

      // Resolve the relative path.
      val pathDelimiterOffset = input.delimiterOffset("?#", pos, limit)
      resolvePath(input, pos, pathDelimiterOffset)
      pos = pathDelimiterOffset

      // Query.
      if (pos < limit && input[pos] == '?') {
        val queryDelimiterOffset = input.delimiterOffset('#', pos, limit)
        this.encodedQueryNamesAndValues = input.canonicalize(
            pos = pos + 1,
            limit = queryDelimiterOffset,
            encodeSet = QUERY_ENCODE_SET,
            alreadyEncoded = true,
            plusIsSpace = true
        ).toQueryNamesAndValues()
        pos = queryDelimiterOffset
      }

      // Fragment.
      if (pos < limit && input[pos] == '#') {
        this.encodedFragment = input.canonicalize(
            pos = pos + 1,
            limit = limit,
            encodeSet = FRAGMENT_ENCODE_SET,
            alreadyEncoded = true,
            unicodeAllowed = true
        )
      }

      return this
    }

    private fun resolvePath(input: String, startPos: Int, limit: Int) {
      var pos = startPos
      // Read a delimiter.
      if (pos == limit) {
        // Empty path: keep the base path as-is.
        return
      }
      val c = input[pos]
      if (c == '/' || c == '\\') {
        // Absolute path: reset to the default "/".
        encodedPathSegments.clear()
        encodedPathSegments.add("")
        pos++
      } else {
        // Relative path: clear everything after the last '/'.
        encodedPathSegments[encodedPathSegments.size - 1] = ""
      }

      // Read path segments.
      var i = pos
      while (i < limit) {
        val pathSegmentDelimiterOffset = input.delimiterOffset("/\\", i, limit)
        val segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit
        push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true)
        i = pathSegmentDelimiterOffset
        if (segmentHasTrailingSlash) i++
      }
    }

    /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */
    private fun push(
      input: String,
      pos: Int,
      limit: Int,
      addTrailingSlash: Boolean,
      alreadyEncoded: Boolean
    ) {
      val segment = input.canonicalize(
          pos = pos,
          limit = limit,
          encodeSet = PATH_SEGMENT_ENCODE_SET,
          alreadyEncoded = alreadyEncoded
      )
      if (isDot(segment)) {
        return // Skip '.' path segments.
      }
      if (isDotDot(segment)) {
        pop()
        return
      }
      if (encodedPathSegments[encodedPathSegments.size - 1].isEmpty()) {
        encodedPathSegments[encodedPathSegments.size - 1] = segment
      } else {
        encodedPathSegments.add(segment)
      }
      if (addTrailingSlash) {
        encodedPathSegments.add("")
      }
    }

    private fun isDot(input: String): Boolean {
      return input == "." || input.equals("%2e", ignoreCase = true)
    }

    private fun isDotDot(input: String): Boolean {
      return input == ".." ||
          input.equals("%2e.", ignoreCase = true) ||
          input.equals(".%2e", ignoreCase = true) ||
          input.equals("%2e%2e", ignoreCase = true)
    }

    /**
     * Removes a path segment. When this method returns the last segment is always "", which means
     * the encoded path will have a trailing '/'.
     *
     * Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from ["a",
     * "b", "c", ""] to ["a", "b", ""].
     *
     * Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"]
     * to ["a", "b", ""].
     */
    private fun pop() {
      val removed = encodedPathSegments.removeAt(encodedPathSegments.size - 1)

      // Make sure the path ends with a '/' by either adding an empty string or clearing a segment.
      if (removed.isEmpty() && encodedPathSegments.isNotEmpty()) {
        encodedPathSegments[encodedPathSegments.size - 1] = ""
      } else {
        encodedPathSegments.add("")
      }
    }

    companion object {
      internal const val INVALID_HOST = "Invalid URL host"

      /**
       * Returns the index of the ':' in `input` that is after scheme characters. Returns -1 if
       * `input` does not have a scheme that starts at `pos`.
       */
      private fun schemeDelimiterOffset(input: String, pos: Int, limit: Int): Int {
        if (limit - pos < 2) return -1

        val c0 = input[pos]
        if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1 // Not a scheme start char.

        characters@ for (i in pos + 1 until limit) {
          return when (input[i]) {
            // Scheme character. Keep going.
            in 'a'..'z', in 'A'..'Z', in '0'..'9', '+', '-', '.' -> continue@characters

            // Scheme prefix!
            ':' -> i

            // Non-scheme character before the first ':'.
            else -> -1
          }
        }

        return -1 // No ':'; doesn't start with a scheme.
      }

      /** Returns the number of '/' and '\' slashes in this, starting at `pos`. */
      private fun String.slashCount(pos: Int, limit: Int): Int {
        var slashCount = 0
        for (i in pos until limit) {
          val c = this[i]
          if (c == '\\' || c == '/') {
            slashCount++
          } else {
            break
          }
        }
        return slashCount
      }

      /** Finds the first ':' in `input`, skipping characters between square braces "[...]". */
      private fun portColonOffset(input: String, pos: Int, limit: Int): Int {
        var i = pos
        while (i < limit) {
          when (input[i]) {
            '[' -> {
              while (++i < limit) {
                if (input[i] == ']') break
              }
            }
            ':' -> return i
          }
          i++
        }
        return limit // No colon.
      }

      private fun parsePort(input: String, pos: Int, limit: Int): Int {
        return try {
          // Canonicalize the port string to skip '\n' etc.
          val portString = input.canonicalize(pos = pos, limit = limit, encodeSet = "")
          val i = portString.toInt()
          if (i in 1..65535) i else -1
        } catch (_: NumberFormatException) {
          -1 // Invalid port.
        }
      }
    }
  }

  companion object {
    private val HEX_DIGITS =
        charArrayOf('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F')
    internal const val USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"
    internal const val PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"
    internal const val PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#"
    internal const val PATH_SEGMENT_ENCODE_SET_URI = "[]"
    internal const val QUERY_ENCODE_SET = " \"'<>#"
    internal const val QUERY_COMPONENT_REENCODE_SET = " \"'<>#&="
    internal const val QUERY_COMPONENT_ENCODE_SET = " !\"#$&'(),/:;<=>?@[]\\^`{|}~"
    internal const val QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}"
    internal const val FORM_ENCODE_SET = " !\"#$&'()+,/:;<=>?@[\\]^`{|}~"
    internal const val FRAGMENT_ENCODE_SET = ""
    internal const val FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}"

    /** Returns 80 if `scheme.equals("http")`, 443 if `scheme.equals("https")` and -1 otherwise. */
    @JvmStatic
    fun defaultPort(scheme: String): Int {
      return when (scheme) {
        "http" -> 80
        "https" -> 443
        else -> -1
      }
    }

    /** Returns a path string for this list of path segments. */
    internal fun List.toPathString(out: StringBuilder) {
      for (i in 0 until size) {
        out.append('/')
        out.append(this[i])
      }
    }

    /** Returns a string for this list of query names and values. */
    internal fun List.toQueryString(out: StringBuilder) {
      for (i in 0 until size step 2) {
        val name = this[i]
        val value = this[i + 1]
        if (i > 0) out.append('&')
        out.append(name)
        if (value != null) {
          out.append('=')
          out.append(value)
        }
      }
    }

    /**
     * Cuts this string up into alternating parameter names and values. This divides a query string
     * like `subject=math&easy&problem=5-2=3` into the list `["subject", "math", "easy", null,
     * "problem", "5-2=3"]`. Note that values may be null and may contain '=' characters.
     */
    internal fun String.toQueryNamesAndValues(): MutableList {
      val result = mutableListOf()
      var pos = 0
      while (pos <= length) {
        var ampersandOffset = indexOf('&', pos)
        if (ampersandOffset == -1) ampersandOffset = length

        val equalsOffset = indexOf('=', pos)
        if (equalsOffset == -1 || equalsOffset > ampersandOffset) {
          result.add(substring(pos, ampersandOffset))
          result.add(null) // No value for this name.
        } else {
          result.add(substring(pos, equalsOffset))
          result.add(substring(equalsOffset + 1, ampersandOffset))
        }
        pos = ampersandOffset + 1
      }
      return result
    }

    /**
     * Returns a new [HttpUrl] representing this.
     *
     * @throws IllegalArgumentException If this is not a well-formed HTTP or HTTPS URL.
     */
    @JvmStatic
    @JvmName("get") fun String.toHttpUrl(): HttpUrl = Builder().parse(null, this).build()

    /**
     * Returns a new `HttpUrl` representing `url` if it is a well-formed HTTP or HTTPS URL, or null
     * if it isn't.
     */
    @JvmStatic
    @JvmName("parse") fun String.toHttpUrlOrNull(): HttpUrl? {
      return try {
        toHttpUrl()
      } catch (_: IllegalArgumentException) {
        null
      }
    }

    /**
     * Returns an [HttpUrl] for this if its protocol is `http` or `https`, or null if it has any
     * other protocol.
     */
    @JvmStatic
    @JvmName("get") fun URL.toHttpUrlOrNull(): HttpUrl? = toString().toHttpUrlOrNull()

    @JvmStatic
    @JvmName("get") fun URI.toHttpUrlOrNull(): HttpUrl? = toString().toHttpUrlOrNull()

    @JvmName("-deprecated_get")
    @Deprecated(
        message = "moved to extension function",
        replaceWith = ReplaceWith(
            expression = "url.toHttpUrl()",
            imports = ["okhttp3.HttpUrl.Companion.toHttpUrl"]),
        level = DeprecationLevel.ERROR)
    fun get(url: String): HttpUrl = url.toHttpUrl()

    @JvmName("-deprecated_parse")
    @Deprecated(
        message = "moved to extension function",
        replaceWith = ReplaceWith(
            expression = "url.toHttpUrlOrNull()",
            imports = ["okhttp3.HttpUrl.Companion.toHttpUrlOrNull"]),
        level = DeprecationLevel.ERROR)
    fun parse(url: String): HttpUrl? = url.toHttpUrlOrNull()

    @JvmName("-deprecated_get")
    @Deprecated(
        message = "moved to extension function",
        replaceWith = ReplaceWith(
            expression = "url.toHttpUrlOrNull()",
            imports = ["okhttp3.HttpUrl.Companion.toHttpUrlOrNull"]),
        level = DeprecationLevel.ERROR)
    fun get(url: URL): HttpUrl? = url.toHttpUrlOrNull()

    @JvmName("-deprecated_get")
    @Deprecated(
        message = "moved to extension function",
        replaceWith = ReplaceWith(
            expression = "uri.toHttpUrlOrNull()",
            imports = ["okhttp3.HttpUrl.Companion.toHttpUrlOrNull"]),
        level = DeprecationLevel.ERROR)
    fun get(uri: URI): HttpUrl? = uri.toHttpUrlOrNull()

    internal fun String.percentDecode(
      pos: Int = 0,
      limit: Int = length,
      plusIsSpace: Boolean = false
    ): String {
      for (i in pos until limit) {
        val c = this[i]
        if (c == '%' || c == '+' && plusIsSpace) {
          // Slow path: the character at i requires decoding!
          val out = Buffer()
          out.writeUtf8(this, pos, i)
          out.writePercentDecoded(this, pos = i, limit = limit, plusIsSpace = plusIsSpace)
          return out.readUtf8()
        }
      }

      // Fast path: no characters in [pos..limit) required decoding.
      return substring(pos, limit)
    }

    private fun Buffer.writePercentDecoded(
      encoded: String,
      pos: Int,
      limit: Int,
      plusIsSpace: Boolean
    ) {
      var codePoint: Int
      var i = pos
      while (i < limit) {
        codePoint = encoded.codePointAt(i)
        if (codePoint == '%'.code && i + 2 < limit) {
          val d1 = encoded[i + 1].parseHexDigit()
          val d2 = encoded[i + 2].parseHexDigit()
          if (d1 != -1 && d2 != -1) {
            writeByte((d1 shl 4) + d2)
            i += 2
            i += Character.charCount(codePoint)
            continue
          }
        } else if (codePoint == '+'.code && plusIsSpace) {
          writeByte(' '.code)
          i++
          continue
        }
        writeUtf8CodePoint(codePoint)
        i += Character.charCount(codePoint)
      }
    }

    private fun String.isPercentEncoded(pos: Int, limit: Int): Boolean {
      return pos + 2 < limit &&
          this[pos] == '%' &&
          this[pos + 1].parseHexDigit() != -1 &&
          this[pos + 2].parseHexDigit() != -1
    }

    /**
     * Returns a substring of `input` on the range `[pos..limit)` with the following
     * transformations:
     *
     *  * Tabs, newlines, form feeds and carriage returns are skipped.
     *
     *  * In queries, ' ' is encoded to '+' and '+' is encoded to "%2B".
     *
     *  * Characters in `encodeSet` are percent-encoded.
     *
     *  * Control characters and non-ASCII characters are percent-encoded.
     *
     *  * All other characters are copied without transformation.
     *
     * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.
     * @param strict true to encode '%' if it is not the prefix of a valid percent encoding.
     * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded.
     * @param unicodeAllowed true to leave non-ASCII codepoint unencoded.
     * @param charset which charset to use, null equals UTF-8.
     */
    internal fun String.canonicalize(
      pos: Int = 0,
      limit: Int = length,
      encodeSet: String,
      alreadyEncoded: Boolean = false,
      strict: Boolean = false,
      plusIsSpace: Boolean = false,
      unicodeAllowed: Boolean = false,
      charset: Charset? = null
    ): String {
      var codePoint: Int
      var i = pos
      while (i < limit) {
        codePoint = codePointAt(i)
        if (codePoint < 0x20 ||
            codePoint == 0x7f ||
            codePoint >= 0x80 && !unicodeAllowed ||
            codePoint.toChar() in encodeSet ||
            codePoint == '%'.code &&
            (!alreadyEncoded || strict && !isPercentEncoded(i, limit)) ||
            codePoint == '+'.code && plusIsSpace) {
          // Slow path: the character at i requires encoding!
          val out = Buffer()
          out.writeUtf8(this, pos, i)
          out.writeCanonicalized(
              input = this,
              pos = i,
              limit = limit,
              encodeSet = encodeSet,
              alreadyEncoded = alreadyEncoded,
              strict = strict,
              plusIsSpace = plusIsSpace,
              unicodeAllowed = unicodeAllowed,
              charset = charset
          )
          return out.readUtf8()
        }
        i += Character.charCount(codePoint)
      }

      // Fast path: no characters in [pos..limit) required encoding.
      return substring(pos, limit)
    }

    private fun Buffer.writeCanonicalized(
      input: String,
      pos: Int,
      limit: Int,
      encodeSet: String,
      alreadyEncoded: Boolean,
      strict: Boolean,
      plusIsSpace: Boolean,
      unicodeAllowed: Boolean,
      charset: Charset?
    ) {
      var encodedCharBuffer: Buffer? = null // Lazily allocated.
      var codePoint: Int
      var i = pos
      while (i < limit) {
        codePoint = input.codePointAt(i)
        if (alreadyEncoded && (codePoint == '\t'.code || codePoint == '\n'.code ||
                codePoint == '\u000c'.code || codePoint == '\r'.code)) {
          // Skip this character.
        } else if (codePoint == ' '.code && encodeSet === FORM_ENCODE_SET) {
          // Encode ' ' as '+'.
          writeUtf8("+")
        } else if (codePoint == '+'.code && plusIsSpace) {
          // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.
          writeUtf8(if (alreadyEncoded) "+" else "%2B")
        } else if (codePoint < 0x20 ||
            codePoint == 0x7f ||
            codePoint >= 0x80 && !unicodeAllowed ||
            codePoint.toChar() in encodeSet ||
            codePoint == '%'.code &&
            (!alreadyEncoded || strict && !input.isPercentEncoded(i, limit))) {
          // Percent encode this character.
          if (encodedCharBuffer == null) {
            encodedCharBuffer = Buffer()
          }

          if (charset == null || charset == UTF_8) {
            encodedCharBuffer.writeUtf8CodePoint(codePoint)
          } else {
            encodedCharBuffer.writeString(input, i, i + Character.charCount(codePoint), charset)
          }

          while (!encodedCharBuffer.exhausted()) {
            val b = encodedCharBuffer.readByte().toInt() and 0xff
            writeByte('%'.code)
            writeByte(HEX_DIGITS[b shr 4 and 0xf].code)
            writeByte(HEX_DIGITS[b and 0xf].code)
          }
        } else {
          // This character doesn't need encoding. Just copy it over.
          writeUtf8CodePoint(codePoint)
        }
        i += Character.charCount(codePoint)
      }
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy