All Downloads are FREE. Search and download functionalities are using the official Maven repository.

main.okhttp3.internal.-HostnamesCommon.kt Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2021 Square, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
@file:Suppress("ktlint:standard:filename")

package okhttp3.internal

import okhttp3.internal.idn.IDNA_MAPPING_TABLE
import okhttp3.internal.idn.Punycode
import okio.Buffer

/**
 * Quick and dirty pattern to differentiate IP addresses from hostnames. This is an approximation
 * of Android's private InetAddress#isNumeric API.
 *
 * This matches IPv6 addresses as a hex string containing at least one colon, and possibly
 * including dots after the first colon. It matches IPv4 addresses as strings containing only
 * decimal digits and dots. This pattern matches strings like "a:.23" and "54" that are neither IP
 * addresses nor hostnames; they will be verified as IP addresses (which is a more strict
 * verification).
 */
private val VERIFY_AS_IP_ADDRESS = "([0-9a-fA-F]*:[0-9a-fA-F:.]*)|([\\d.]+)".toRegex()

/** Returns true if this string is not a host name and might be an IP address. */
fun String.canParseAsIpAddress(): Boolean = VERIFY_AS_IP_ADDRESS.matches(this)

/**
 * Returns true if the length is not valid for DNS (empty or greater than 253 characters), or if any
 * label is longer than 63 characters. Trailing dots are okay.
 */
internal fun String.containsInvalidLabelLengths(): Boolean {
  if (length !in 1..253) return true

  var labelStart = 0
  while (true) {
    val dot = indexOf('.', startIndex = labelStart)
    val labelLength =
      when (dot) {
        -1 -> length - labelStart
        else -> dot - labelStart
      }
    if (labelLength !in 1..63) return true
    if (dot == -1) break
    if (dot == length - 1) break // Trailing '.' is allowed.
    labelStart = dot + 1
  }

  return false
}

internal fun String.containsInvalidHostnameAsciiCodes(): Boolean {
  for (i in 0 until length) {
    val c = this[i]
    // The WHATWG Host parsing rules accepts some character codes which are invalid by
    // definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here
    // we rule out characters that would cause problems in host headers.
    if (c <= '\u001f' || c >= '\u007f') {
      return true
    }
    // Check for the characters mentioned in the WHATWG Host parsing spec:
    // U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]"
    // (excluding the characters covered above).
    if (" #%/:?@[\\]".indexOf(c) != -1) {
      return true
    }
  }
  return false
}

/** Decodes an IPv6 address like 1111:2222:3333:4444:5555:6666:7777:8888 or ::1. */
internal fun decodeIpv6(
  input: String,
  pos: Int,
  limit: Int,
): ByteArray? {
  val address = ByteArray(16)
  var b = 0
  var compress = -1
  var groupOffset = -1

  var i = pos
  while (i < limit) {
    if (b == address.size) return null // Too many groups.

    // Read a delimiter.
    if (i + 2 <= limit && input.startsWith("::", startIndex = i)) {
      // Compression "::" delimiter, which is anywhere in the input, including its prefix.
      if (compress != -1) return null // Multiple "::" delimiters.
      i += 2
      b += 2
      compress = b
      if (i == limit) break
    } else if (b != 0) {
      // Group separator ":" delimiter.
      if (input.startsWith(":", startIndex = i)) {
        i++
      } else if (input.startsWith(".", startIndex = i)) {
        // If we see a '.', rewind to the beginning of the previous group and parse as IPv4.
        if (!decodeIpv4Suffix(input, groupOffset, limit, address, b - 2)) return null
        b += 2 // We rewound two bytes and then added four.
        break
      } else {
        return null // Wrong delimiter.
      }
    }

    // Read a group, one to four hex digits.
    var value = 0
    groupOffset = i
    while (i < limit) {
      val hexDigit = input[i].parseHexDigit()
      if (hexDigit == -1) break
      value = (value shl 4) + hexDigit
      i++
    }
    val groupLength = i - groupOffset
    if (groupLength == 0 || groupLength > 4) return null // Group is the wrong size.

    // We've successfully read a group. Assign its value to our byte array.
    address[b++] = (value.ushr(8) and 0xff).toByte()
    address[b++] = (value and 0xff).toByte()
  }

  // All done. If compression happened, we need to move bytes to the right place in the
  // address. Here's a sample:
  //
  //      input: "1111:2222:3333::7777:8888"
  //     before: { 11, 11, 22, 22, 33, 33, 00, 00, 77, 77, 88, 88, 00, 00, 00, 00  }
  //   compress: 6
  //          b: 10
  //      after: { 11, 11, 22, 22, 33, 33, 00, 00, 00, 00, 00, 00, 77, 77, 88, 88 }
  //
  if (b != address.size) {
    if (compress == -1) return null // Address didn't have compression or enough groups.
    address.copyInto(address, address.size - (b - compress), compress, b)
    address.fill(0.toByte(), compress, compress + (address.size - b))
  }

  return address
}

/** Decodes an IPv4 address suffix of an IPv6 address, like 1111::5555:6666:192.168.0.1. */
internal fun decodeIpv4Suffix(
  input: String,
  pos: Int,
  limit: Int,
  address: ByteArray,
  addressOffset: Int,
): Boolean {
  var b = addressOffset

  var i = pos
  while (i < limit) {
    if (b == address.size) return false // Too many groups.

    // Read a delimiter.
    if (b != addressOffset) {
      if (input[i] != '.') return false // Wrong delimiter.
      i++
    }

    // Read 1 or more decimal digits for a value in 0..255.
    var value = 0
    val groupOffset = i
    while (i < limit) {
      val c = input[i]
      if (c < '0' || c > '9') break
      if (value == 0 && groupOffset != i) return false // Reject unnecessary leading '0's.
      value = value * 10 + c.code - '0'.code
      if (value > 255) return false // Value out of range.
      i++
    }
    val groupLength = i - groupOffset
    if (groupLength == 0) return false // No digits.

    // We've successfully read a byte.
    address[b++] = value.toByte()
  }

  // Check for too few groups. We wanted exactly four.
  return b == addressOffset + 4
}

/** Encodes an IPv6 address in canonical form according to RFC 5952. */
internal fun inet6AddressToAscii(address: ByteArray): String {
  // Go through the address looking for the longest run of 0s. Each group is 2-bytes.
  // A run must be longer than one group (section 4.2.2).
  // If there are multiple equal runs, the first one must be used (section 4.2.3).
  var longestRunOffset = -1
  var longestRunLength = 0
  run {
    var i = 0
    while (i < address.size) {
      val currentRunOffset = i
      while (i < 16 && address[i].toInt() == 0 && address[i + 1].toInt() == 0) {
        i += 2
      }
      val currentRunLength = i - currentRunOffset
      if (currentRunLength > longestRunLength && currentRunLength >= 4) {
        longestRunOffset = currentRunOffset
        longestRunLength = currentRunLength
      }
      i += 2
    }
  }

  // Emit each 2-byte group in hex, separated by ':'. The longest run of zeroes is "::".
  val result = Buffer()
  var i = 0
  while (i < address.size) {
    if (i == longestRunOffset) {
      result.writeByte(':'.code)
      i += longestRunLength
      if (i == 16) result.writeByte(':'.code)
    } else {
      if (i > 0) result.writeByte(':'.code)
      val group = address[i] and 0xff shl 8 or (address[i + 1] and 0xff)
      result.writeHexadecimalUnsignedLong(group.toLong())
      i += 2
    }
  }
  return result.readUtf8()
}

/**
 * Returns the canonical address for [address]. If [address] is an IPv6 address that is mapped to an
 * IPv4 address, this returns the IPv4-mapped address. Otherwise, this returns [address].
 *
 * https://en.wikipedia.org/wiki/IPv6#IPv4-mapped_IPv6_addresses
 */
internal fun canonicalizeInetAddress(address: ByteArray): ByteArray {
  return when {
    isMappedIpv4Address(address) -> address.sliceArray(12 until 16)
    else -> address
  }
}

/** Returns true for IPv6 addresses like `0000:0000:0000:0000:0000:ffff:XXXX:XXXX`. */
private fun isMappedIpv4Address(address: ByteArray): Boolean {
  if (address.size != 16) return false

  for (i in 0 until 10) {
    if (address[i] != 0.toByte()) return false
  }

  if (address[10] != 255.toByte()) return false
  if (address[11] != 255.toByte()) return false

  return true
}

/** Encodes an IPv4 address in canonical form according to RFC 4001. */
internal fun inet4AddressToAscii(address: ByteArray): String {
  require(address.size == 4)
  return Buffer()
    .writeDecimalLong((address[0] and 0xff).toLong())
    .writeByte('.'.code)
    .writeDecimalLong((address[1] and 0xff).toLong())
    .writeByte('.'.code)
    .writeDecimalLong((address[2] and 0xff).toLong())
    .writeByte('.'.code)
    .writeDecimalLong((address[3] and 0xff).toLong())
    .readUtf8()
}

/**
 * If this is an IP address, this returns the IP address in canonical form.
 *
 * Otherwise, this performs IDN ToASCII encoding and canonicalize the result to lowercase. For
 * example this converts `☃.net` to `xn--n3h.net`, and `WwW.GoOgLe.cOm` to `www.google.com`.
 * `null` will be returned if the host cannot be ToASCII encoded or if the result contains
 * unsupported ASCII characters.
 */
internal fun String.toCanonicalHost(): String? {
  val host: String = this

  // If the input contains a :, it’s an IPv6 address.
  if (":" in host) {
    // If the input is encased in square braces "[...]", drop 'em.
    val inetAddressByteArray =
      (
        if (host.startsWith("[") && host.endsWith("]")) {
          decodeIpv6(host, 1, host.length - 1)
        } else {
          decodeIpv6(host, 0, host.length)
        }
      ) ?: return null

    val address = canonicalizeInetAddress(inetAddressByteArray)
    if (address.size == 16) return inet6AddressToAscii(address)
    if (address.size == 4) return inet4AddressToAscii(address) // An IPv4-mapped IPv6 address.
    throw AssertionError("Invalid IPv6 address: '$host'")
  }

  val result = idnToAscii(host) ?: return null
  if (result.isEmpty()) return null
  if (result.containsInvalidHostnameAsciiCodes()) return null
  if (result.containsInvalidLabelLengths()) return null

  return result
}

internal fun idnToAscii(host: String): String? {
  val bufferA = Buffer().writeUtf8(host)
  val bufferB = Buffer()

  // 1. Map, from bufferA to bufferB.
  while (!bufferA.exhausted()) {
    val codePoint = bufferA.readUtf8CodePoint()
    if (!IDNA_MAPPING_TABLE.map(codePoint, bufferB)) return null
  }

  // 2. Normalize, from bufferB to bufferA.
  val normalized = normalizeNfc(bufferB.readUtf8())
  bufferA.writeUtf8(normalized)

  // 3. For each label, convert/validate Punycode.
  val decoded = Punycode.decode(bufferA.readUtf8()) ?: return null

  // 4.1 Validate.

  // Must be NFC.
  if (decoded != normalizeNfc(decoded)) return null

  // TODO: Must not begin with a combining mark.
  // TODO: Each character must be 'valid' or 'deviation'. Not mapped.
  // TODO: CheckJoiners from IDNA 2008
  // TODO: CheckBidi from IDNA 2008, RFC 5893, Section 2.

  return Punycode.encode(decoded)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy