main.okhttp3.internal.url.-Url.kt Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of okhttp Show documentation
The newest version!
/*
 * Copyright (C) 2022 Square, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
@file:Suppress("ktlint:standard:filename")

package okhttp3.internal.url

import java.nio.charset.Charset
import okhttp3.internal.parseHexDigit
import okio.Buffer

internal val HEX_DIGITS =
  charArrayOf('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F')
internal const val USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"
internal const val PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"
internal const val PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#"
internal const val PATH_SEGMENT_ENCODE_SET_URI = "[]"
internal const val QUERY_ENCODE_SET = " \"'<>#"
internal const val QUERY_COMPONENT_REENCODE_SET = " \"'<>#&="
internal const val QUERY_COMPONENT_ENCODE_SET = " !\"#$&'(),/:;<=>?@[]\\^`{|}~"
internal const val QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}"
internal const val FORM_ENCODE_SET = " !\"#$&'()+,/:;<=>?@[\\]^`{|}~"
internal const val FRAGMENT_ENCODE_SET = ""
internal const val FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}"

internal fun Buffer.writeCanonicalized(
  input: String,
  pos: Int,
  limit: Int,
  encodeSet: String,
  alreadyEncoded: Boolean,
  strict: Boolean,
  plusIsSpace: Boolean,
  unicodeAllowed: Boolean,
  charset: Charset?,
) {
  var encodedCharBuffer: Buffer? = null // Lazily allocated.
  var codePoint: Int
  var i = pos
  while (i < limit) {
    codePoint = input.codePointAt(i)
    if (alreadyEncoded && (
        codePoint == '\t'.code || codePoint == '\n'.code ||
          codePoint == '\u000c'.code || codePoint == '\r'.code
      )
    ) {
      // Skip this character.
    } else if (codePoint == ' '.code && encodeSet === FORM_ENCODE_SET) {
      // Encode ' ' as '+'.
      writeUtf8("+")
    } else if (codePoint == '+'.code && plusIsSpace) {
      // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.
      writeUtf8(if (alreadyEncoded) "+" else "%2B")
    } else if (codePoint < 0x20 ||
      codePoint == 0x7f ||
      codePoint >= 0x80 && !unicodeAllowed ||
      codePoint.toChar() in encodeSet ||
      codePoint == '%'.code &&
      (!alreadyEncoded || strict && !input.isPercentEncoded(i, limit))
    ) {
      // Percent encode this character.
      if (encodedCharBuffer == null) {
        encodedCharBuffer = Buffer()
      }

      if (charset == null || charset == Charsets.UTF_8) {
        encodedCharBuffer.writeUtf8CodePoint(codePoint)
      } else {
        encodedCharBuffer.writeString(input, i, i + Character.charCount(codePoint), charset)
      }

      while (!encodedCharBuffer.exhausted()) {
        val b = encodedCharBuffer.readByte().toInt() and 0xff
        writeByte('%'.code)
        writeByte(HEX_DIGITS[b shr 4 and 0xf].code)
        writeByte(HEX_DIGITS[b and 0xf].code)
      }
    } else {
      // This character doesn't need encoding. Just copy it over.
      writeUtf8CodePoint(codePoint)
    }
    i += Character.charCount(codePoint)
  }
}

/**
 * Returns a substring of `input` on the range `[pos..limit)` with the following
 * transformations:
 *
 *  * Tabs, newlines, form feeds and carriage returns are skipped.
 *
 *  * In queries, ' ' is encoded to '+' and '+' is encoded to "%2B".
 *
 *  * Characters in `encodeSet` are percent-encoded.
 *
 *  * Control characters and non-ASCII characters are percent-encoded.
 *
 *  * All other characters are copied without transformation.
 *
 * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.
 * @param strict true to encode '%' if it is not the prefix of a valid percent encoding.
 * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded.
 * @param unicodeAllowed true to leave non-ASCII codepoint unencoded.
 * @param charset which charset to use, null equals UTF-8.
 */
internal fun String.canonicalizeWithCharset(
  pos: Int = 0,
  limit: Int = length,
  encodeSet: String,
  alreadyEncoded: Boolean = false,
  strict: Boolean = false,
  plusIsSpace: Boolean = false,
  unicodeAllowed: Boolean = false,
  charset: Charset? = null,
): String {
  var codePoint: Int
  var i = pos
  while (i < limit) {
    codePoint = codePointAt(i)
    if (codePoint < 0x20 ||
      codePoint == 0x7f ||
      codePoint >= 0x80 && !unicodeAllowed ||
      codePoint.toChar() in encodeSet ||
      codePoint == '%'.code &&
      (!alreadyEncoded || strict && !isPercentEncoded(i, limit)) ||
      codePoint == '+'.code && plusIsSpace
    ) {
      // Slow path: the character at i requires encoding!
      val out = Buffer()
      out.writeUtf8(this, pos, i)
      out.writeCanonicalized(
        input = this,
        pos = i,
        limit = limit,
        encodeSet = encodeSet,
        alreadyEncoded = alreadyEncoded,
        strict = strict,
        plusIsSpace = plusIsSpace,
        unicodeAllowed = unicodeAllowed,
        charset = charset,
      )
      return out.readUtf8()
    }
    i += Character.charCount(codePoint)
  }

  // Fast path: no characters in [pos..limit) required encoding.
  return substring(pos, limit)
}

internal fun Buffer.writePercentDecoded(
  encoded: String,
  pos: Int,
  limit: Int,
  plusIsSpace: Boolean,
) {
  var codePoint: Int
  var i = pos
  while (i < limit) {
    codePoint = encoded.codePointAt(i)
    if (codePoint == '%'.code && i + 2 < limit) {
      val d1 = encoded[i + 1].parseHexDigit()
      val d2 = encoded[i + 2].parseHexDigit()
      if (d1 != -1 && d2 != -1) {
        writeByte((d1 shl 4) + d2)
        i += 2
        i += Character.charCount(codePoint)
        continue
      }
    } else if (codePoint == '+'.code && plusIsSpace) {
      writeByte(' '.code)
      i++
      continue
    }
    writeUtf8CodePoint(codePoint)
    i += Character.charCount(codePoint)
  }
}

internal fun String.canonicalize(
  pos: Int = 0,
  limit: Int = length,
  encodeSet: String,
  alreadyEncoded: Boolean = false,
  strict: Boolean = false,
  plusIsSpace: Boolean = false,
  unicodeAllowed: Boolean = false,
): String {
  return canonicalizeWithCharset(
    pos = pos,
    limit = limit,
    encodeSet = encodeSet,
    alreadyEncoded = alreadyEncoded,
    strict = strict,
    plusIsSpace = plusIsSpace,
    unicodeAllowed = unicodeAllowed,
  )
}

internal fun String.percentDecode(
  pos: Int = 0,
  limit: Int = length,
  plusIsSpace: Boolean = false,
): String {
  for (i in pos until limit) {
    val c = this[i]
    if (c == '%' || c == '+' && plusIsSpace) {
      // Slow path: the character at i requires decoding!
      val out = Buffer()
      out.writeUtf8(this, pos, i)
      out.writePercentDecoded(this, pos = i, limit = limit, plusIsSpace = plusIsSpace)
      return out.readUtf8()
    }
  }

  // Fast path: no characters in [pos..limit) required decoding.
  return substring(pos, limit)
}

internal fun String.isPercentEncoded(
  pos: Int,
  limit: Int,
): Boolean {
  return pos + 2 < limit &&
    this[pos] == '%' &&
    this[pos + 1].parseHexDigit() != -1 &&
    this[pos + 2].parseHexDigit() != -1
}