All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jvmTest.okhttp3.internal.idn.stringprepTablesReader.kt Maven / Gradle / Ivy

There is a newer version: 5.0.0-alpha.14
Show newest version
/*
 * Copyright (C) 2022 Square, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package okhttp3.internal.idn

import okio.Buffer
import okio.BufferedSource
import okio.ByteString.Companion.encodeUtf8
import okio.FileSystem
import okio.IOException
import okio.Options
import okio.Path
import okio.buffer
import okio.use

class StringprepTablesReader(
  private val fileSystem: FileSystem,
) {
  /**
   * Returns a [Stringprep] that uses the tables of Nameprep ([RFC 3491]).
   *
   * [RFC 3491]: https://datatracker.ietf.org/doc/html/rfc3491
   */
  fun readNameprep(base: Path): Stringprep {
    val unassigned = readCodePointSet(base / "rfc3454.A.1.txt")
    val mapping = MappingListCodePointMapping(
      mutableMapOf()
        .apply {
          putAll(readCodePointMapping(base / "rfc3454.B.1.txt").mappings)
          putAll(readCodePointMapping(base / "rfc3454.B.2.txt").mappings)
        }
    )
    val prohibitSet = RangeListCodePointSet(
      ranges = mutableListOf()
        .apply {
          addAll(readCodePointSet(base / "rfc3454.C.1.2.txt").ranges)
          addAll(readCodePointSet(base / "rfc3454.C.2.2.txt").ranges)
          addAll(readCodePointSet(base / "rfc3454.C.3.txt").ranges)
          addAll(readCodePointSet(base / "rfc3454.C.4.txt").ranges)
          addAll(readCodePointSet(base / "rfc3454.C.5.txt").ranges)
          addAll(readCodePointSet(base / "rfc3454.C.6.txt").ranges)
          addAll(readCodePointSet(base / "rfc3454.C.7.txt").ranges)
          addAll(readCodePointSet(base / "rfc3454.C.8.txt").ranges)
          addAll(readCodePointSet(base / "rfc3454.C.9.txt").ranges)
        }
    )
    val randalcatSet = readCodePointSet(base / "rfc3454.D.1.txt")
    val lcatSet = readCodePointSet(base / "rfc3454.D.2.txt")
    return Stringprep(
      unassigned = unassigned,
      mapping = mapping,
      prohibitSet = prohibitSet,
      randalcatSet = randalcatSet,
      lcatSet = lcatSet
    )
  }

  /**
   * Reads a set of range lines like the following:
   *
   * ```
   *  0221
   *  0234-024F
   *  0000-001F; [CONTROL CHARACTERS]
   *  007F; DELETE
   * ```
   */
  fun readCodePointSet(path: Path): RangeListCodePointSet {
    fileSystem.source(path).buffer().use { source ->
      return source.readCodePointSet()
    }
  }

  /**
   * Reads a set of mapping lines like the following:
   *
   * ```
   *    180C; ; Map to nothing
   *    0041; 0061; Case map
   *    0390; 03B9 0308 0301; Case map
   * ```
   *
   * Each line maps from a single hexadecimal code point to zero or more hexadecimal code points.
   * Elements are delimited by semicolons with a comment at the end of the line.
   */
  fun readCodePointMapping(path: Path): MappingListCodePointMapping {
    fileSystem.source(path).buffer().use { source ->
      return source.readCodePointMappings()
    }
  }
}

private val optionsSemicolon = Options.of(
  ";".encodeUtf8(), // 0 is ';'.
)

private val optionsSemicolonOrNewlineOrDash = Options.of(
  ";".encodeUtf8(),  // 0 is ';'.
  "\n".encodeUtf8(), // 1 is '\n'.
  "-".encodeUtf8(),  // 2 is '-'.
)

internal fun BufferedSource.readCodePointSet(): RangeListCodePointSet {
  val result = mutableListOf()
  while (!exhausted()) {
    skipWhitespace()
    val startCodePoint = readHexadecimalUnsignedLong().toInt()
    skipWhitespace()
    val intRange = when (select(optionsSemicolonOrNewlineOrDash)) {
      0 -> {
        // ;
        skipRestOfLine()
        IntRange(startCodePoint, startCodePoint)
      }
      1 -> {
        // '\n'
        IntRange(startCodePoint, startCodePoint)
      }
      2 -> {
        // '-'
        val endCodePoint = readHexadecimalUnsignedLong().toInt()
        skipRestOfLine()
        IntRange(startCodePoint, endCodePoint)
      }
      else -> {
        throw IOException("expected ';'")
      }
    }
    result += intRange
  }
  return RangeListCodePointSet(result)
}

internal fun BufferedSource.readCodePointMappings(): MappingListCodePointMapping {
  val result = mutableMapOf()
  val target = Buffer()
  while (!exhausted()) {
    skipWhitespace()
    val sourceCodePoint = readHexadecimalUnsignedLong().toInt()
    skipWhitespace()
    if (select(optionsSemicolon) != 0) throw IOException("expected ';'")
    skipWhitespace()
    while (select(optionsSemicolon) == -1) {
      val targetCodePoint = readHexadecimalUnsignedLong().toInt()
      target.writeUtf8CodePoint(targetCodePoint)
      skipWhitespace()
    }
    skipRestOfLine()
    result[sourceCodePoint] = target.readUtf8()
  }
  return MappingListCodePointMapping(result)
}

private fun BufferedSource.skipWhitespace() {
  while (!exhausted()) {
    if (buffer[0] != ' '.code.toByte()) return
    skip(1L)
  }
}

private fun BufferedSource.skipRestOfLine() {
  when (val newline = indexOf('\n'.code.toByte())) {
    -1L -> skip(buffer.size) // Exhaust this source.
    else -> skip(newline + 1)
  }
}

class MappingListCodePointMapping(
  val mappings: Map
) : CodePointMapping {
  override fun get(codePoint: Int) = mappings[codePoint]
}

class RangeListCodePointSet(
  val ranges: List,
): CodePointSet {
  override fun contains(codePoint: Int) = ranges.any { codePoint in it }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy