All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.scalatra.util.io.FileCharset.scala Maven / Gradle / Ivy

package org.scalatra.util.io

import java.io.File
import java.nio.charset.Charset

import org.mozilla.universalchardet.UniversalDetector
import org.slf4j.LoggerFactory

import scala.io.Codec

object FileCharset {

  @transient private[this] val logger = LoggerFactory.getLogger(getClass)

  private val CheckByteLength = 8192

  def apply(file: File): Charset = {
    try {
      getCharset(UniversalDetector.detectCharset(file))
    } catch {
      case t: Throwable =>
        logger.warn("Failed to detect charset for file: " + file.getPath + ".", t)
        Codec.defaultCharsetCodec.charSet
    }
  }

  private[this] def getCharset(cs: String): Charset = {
    // US-ASCII is compatible with UTF-8, so if the result is US-ASCII, replace it with UTF-8.
    if (cs == "US-ASCII" || cs == null || cs.trim().isEmpty) {

      // Codec.fileEncodingCodec points to UTF-8
      // unless explicitly specified in the form `JAVA_OPTS="-Dfile.encoding=Foo"`.
      Codec.fileEncodingCodec.charSet
    } else {
      Charset.forName(cs)
    }
  }

  def apply(barr: Array[Byte]): Charset = {
    val detector = new UniversalDetector(null)

    var idx = 0
    while (idx < barr.length && idx < CheckByteLength && !detector.isDone) {
      if (idx > 0) detector.handleData(barr, 0, idx)
      idx += 1
    }
    detector.dataEnd()

    getCharset(detector.getDetectedCharset)
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy