All Downloads are FREE. Search and download functionalities are using the official Maven repository.

play.api.libs.iteratee.CharEncoding.scala Maven / Gradle / Ivy

The newest version!
package play.api.libs.iteratee

import java.io.{ ByteArrayOutputStream, StringWriter }
import java.nio.{ ByteBuffer, CharBuffer }
import java.nio.charset._
import play.api.libs.iteratee.Execution.defaultExecutionContext
import scala.annotation.tailrec

/**
 * [[Enumeratee]]s for converting chunks of bytes to chunks of chars, and vice-versa.
 *
 * These methods can handle cases where characters straddle a chunk boundary, and redistribute the data.
 * An erroneous encoding or an incompatible decoding causes a [[Step.Error]].
 */
object CharEncoding {

  private trait Coder[From, To] extends Enumeratee[From, To] {
    private type Inner[A] = Iteratee[To, A]

    protected def empty: From

    protected def code(data: From, last: Boolean): Either[CoderResult, (To, From)]

    protected def concat(a: From, b: From): From

    private def step[A](initial: From = empty)(it: Inner[A]): K[From, Inner[A]] = {
      case in @ Input.El(chars) =>
        it.pureFlatFold[From, Inner[A]] {
          case Step.Cont(k) =>
            code(concat(initial, chars), false).fold({ result =>
              Error(s"coding error: $result", in)
            }, {
              case (bytes, remaining) =>
                val newIt = Iteratee.flatten(it.feed(Input.El(bytes)))
                Cont(step(remaining)(newIt))
            })
          case _ => Done(it)
        }(defaultExecutionContext)
      case in @ Input.Empty =>
        val newIt = Iteratee.flatten(it.feed(in))
        Cont(step(initial)(newIt))
      case in @ Input.EOF =>
        code(initial, true).fold({ result =>
          Error(s"coding error: $result", in)
        }, {
          case (string, remaining) =>
            val newIt = Iteratee.flatten(it.feed(Input.El(string)).flatMap(_.feed(in))(defaultExecutionContext))
            Done(newIt)
        })
    }

    def applyOn[A](inner: Inner[A]) = Cont(step()(inner))
  }

  def decode(charset: Charset): Enumeratee[Array[Byte], String] = new Coder[Array[Byte], String] {
    protected val empty = Array[Byte]()

    protected def concat(a: Array[Byte], b: Array[Byte]) = a ++ b

    protected def code(bytes: Array[Byte], last: Boolean) = {
      val decoder = charset.newDecoder

      val byteBuffer = ByteBuffer.wrap(bytes)
      // at least 2, for UTF-32
      val charBuffer = CharBuffer.allocate(2 max math.ceil(bytes.length * decoder.averageCharsPerByte).toInt)
      val out = new StringWriter

      @tailrec
      def process(charBuffer: CharBuffer): CoderResult = {
        val result = decoder.decode(byteBuffer, charBuffer, true)
        out.write(charBuffer.array, 0, charBuffer.position)
        if (result.isOverflow) {
          if (charBuffer.position == 0) {
            // shouldn't happen for most encodings
            process(CharBuffer.allocate(2 * charBuffer.capacity))
          } else {
            charBuffer.clear()
            process(charBuffer)
          }
        } else {
          result
        }
      }
      val result = process(charBuffer)

      if (result.isUnmappable || last && result.isMalformed) {
        Left(result)
      } else {
        val remaining = if (result.isError) bytes.drop(byteBuffer.position) else empty
        Right((out.toString, remaining))
      }
    }
  }

  /**
   * @throws UnsupportedCharsetException
   */
  def decode(charset: String): Enumeratee[Array[Byte], String] = decode(Charset.forName(charset))

  def encode(charset: Charset): Enumeratee[String, Array[Byte]] = new Coder[String, Array[Byte]] {
    protected def empty = ""

    protected def concat(a: String, b: String) = a + b

    protected def code(chars: String, last: Boolean) = {
      val encoder = charset.newEncoder

      val charBuffer = CharBuffer.wrap(chars)
      // at least 6, for UTF-8
      val byteBuffer = ByteBuffer.allocate(6 max math.ceil(chars.length * encoder.averageBytesPerChar).toInt)
      val out = new ByteArrayOutputStream
      @tailrec
      def process(byteBuffer: ByteBuffer): CoderResult = {
        val result = encoder.encode(charBuffer, byteBuffer, true)
        out.write(byteBuffer.array, 0, byteBuffer.position)
        if (result.isOverflow) {
          if (byteBuffer.position == 0) {
            // shouldn't happen for most encodings
            process(ByteBuffer.allocate(2 * byteBuffer.capacity))
          } else {
            byteBuffer.clear()
            process(byteBuffer)
          }
        } else {
          result
        }
      }
      val result = process(byteBuffer)
      if (result.isUnmappable || last && result.isMalformed) {
        Left(result)
      } else {
        val remaining = if (result.isError) chars.drop(charBuffer.position) else ""
        val bytes = out.toByteArray
        val bytesWithoutBom = if (charset.name.startsWith("UTF-") && bytes.length >= 2 && bytes(0) == 0xfe.toByte && bytes(1) == 0xff.toByte) {
          bytes.drop(2)
        } else {
          bytes
        }
        Right((bytesWithoutBom, remaining))
      }
    }

  }

  /**
   * @throws UnsupportedCharsetException
   */
  def encode(charset: String): Enumeratee[String, Array[Byte]] = encode(Charset.forName(charset))

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy