fm.common.IOUtils.scala Maven / Gradle / Ivy
/*
* Copyright 2014 Frugal Mechanic (http://frugalmechanic.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package fm.common
import java.io._
import java.nio.charset.Charset
import org.apache.commons.io.{IOUtils => ApacheIOUtils}
import org.mozilla.universalchardet.UniversalDetector
/**
* Collection of IO Utilities. Some implemented via Apache Commons IO
*/
object IOUtils {
/**
* If this is already a BufferedInputStream return this otherwise wrap in a BufferedInputStream
*/
def toBufferedInputStream(input: InputStream): BufferedInputStream = input match {
case bufferedInput: BufferedInputStream => bufferedInput
case _ => new BufferedInputStream(input)
}
/**
* If this is already a BufferedOutputStream return this otherwise wrap in a BufferedOutputStream
*/
def toBufferedOutputStream(output: OutputStream): BufferedOutputStream = output match {
case bufferedOutput: BufferedOutputStream => bufferedOutput
case _ => new BufferedOutputStream(output)
}
/**
* If this is already a BufferedReader return this otherwise wrap in a BufferedReader
*/
def toBufferedReader(input: Reader): BufferedReader = input match {
case bufferedInput: BufferedReader => bufferedInput
case _ => new BufferedReader(input)
}
/**
* If this is already a BufferedWriter return this otherwise wrap in a BufferedWriter
*/
def toBufferedWriter(output: Writer): BufferedWriter = output match {
case bufferedOutput: BufferedWriter => bufferedOutput
case _ => new BufferedWriter(output)
}
/**
* Copy bytes from an Resource[InputStream] to an Resource[OutputStream].
*
* This method buffers the input internally, so there is no need to use a BufferedInputStream.
*/
def copy(input: Resource[InputStream], output: Resource[OutputStream]): Int = Resource.use(input, output) { ApacheIOUtils.copy(_, _) }
/**
* Copy bytes from an InputStream to an OutputStream.
*
* This method buffers the input internally, so there is no need to use a BufferedInputStream.
*/
def copy(input: InputStream, output: OutputStream): Int = ApacheIOUtils.copy(input, output)
/**
* Copy chars from a Reader to a Writer.
*
* This method buffers the input internally, so there is no need to use a BufferedReader.
*/
def copy(input: Reader, output: Writer): Int = ApacheIOUtils.copy(input, output)
/**
* Read bytes from an input stream. This implementation guarantees that it will read as many bytes as possible before giving up;
* this IS NOT ALWAYS the case for subclasses of InputStream.
*/
def read(input: InputStream, buffer: Array[Byte]): Int = ApacheIOUtils.read(input, buffer)
/**
* Read bytes from an input stream. This implementation guarantees that it will read as many bytes as possible before giving up;
* this IS NOT ALWAYS the case for subclasses of InputStream.
*/
def read(input: InputStream, buffer: Array[Byte], offset: Int, length: Int): Int = ApacheIOUtils.read(input, buffer, offset, length)
/**
* Read characters from an input character stream. This implementation guarantees that it will read as many characters as possible before giving up;
* this IS NOT ALWAYS the case for subclasses of Reader.
*/
def read(input: Reader, buffer: Array[Char]): Int = ApacheIOUtils.read(input, buffer)
/**
* Read characters from an input character stream. This implementation guarantees that it will read as many characters as possible before giving up;
* this IS NOT ALWAYS the case for subclasses of Reader.
*/
def read(input: Reader, buffer: Array[Char], offset: Int, length: Int): Int = ApacheIOUtils.read(input, buffer, offset, length)
/**
* Skip bytes from an input byte stream. This implementation guarantees that it will read as many bytes as possible before giving up;
* this IS NOT ALWAYS the case for subclasses of Reader.
*/
def skip(input: InputStream, toSkip: Long): Long = ApacheIOUtils.skip(input, toSkip)
/**
* Skip characters from an input character stream. This implementation guarantees that it will read as many characters as possible before giving up;
* this IS NOT ALWAYS the case for subclasses of Reader.
*/
def skip(input: Reader, toSkip: Long): Long = ApacheIOUtils.skip(input, toSkip)
/**
* Get the contents of an InputStream as a byte[].
*
* This method buffers the input internally, so there is no need to use a BufferedInputStream.
*/
def toByteArray(input: InputStream): Array[Byte] = ApacheIOUtils.toByteArray(input)
/**
* Get the contents of a Reader as a character array.
*
* This method buffers the input internally, so there is no need to use a BufferedReader.
*/
def toCharArray(input: Reader): Array[Char] = ApacheIOUtils.toCharArray(input)
/**
* Get the contents of a Reader as a String.
*
* This method buffers the input internally, so there is no need to use a BufferedReader.
*/
def toString(input: Reader): String = ApacheIOUtils.toString(input)
/**
* Attempt to detect the charset of the InputStream using the XML Detector or the Universal Detector
*/
def detectCharset(is: InputStream, useMarkReset: Boolean): Option[Charset] = detectCharsetName(is, useMarkReset).map{ CharsetUtil.forName }
/**
* Attempt to detect the charset of the InputStream using the XML Detector or the Universal Detector
*/
def detectCharsetName(is: InputStream, useMarkReset: Boolean): Option[String] = {
XMLUtil.detectXMLCharsetName(is, useMarkReset) orElse detectUniversalCharsetName(is, useMarkReset)
}
/**
* Attempt to detect the charset of the InputStream using org.mozilla.universalchardet.UniversalDetector
*/
def detectUniversalCharset(is: InputStream, useMarkReset: Boolean): Option[Charset] = detectUniversalCharsetName(is, useMarkReset).map{ CharsetUtil.forName }
/**
* Attempt to detect the charset of the InputStream using org.mozilla.universalchardet.UniversalDetector
*/
def detectUniversalCharsetName(is: InputStream, useMarkReset: Boolean): Option[String] = {
if (useMarkReset) require(is.markSupported, "detectCharsetName required mark/reset support! Try wrapping in a BufferedInputStream")
val bufSize: Int = 8192
if (useMarkReset) is.mark(bufSize)
val buf: Array[Byte] = new Array[Byte](bufSize)
val detector: UniversalDetector = new UniversalDetector(null)
// Bytes read per-read (can be <= bufSize)
var nread: Int = read(is, buf)
if (useMarkReset) {
// We can only feed as much data as we've mark()'ed
detector.handleData(buf, 0, nread)
} else {
// We can read as much data as we need
// NOTE!!! Apache IOUtils.read will read as much data as possible and will not return -1 for EOF!!!
while (nread > 0) {
detector.handleData(buf, 0, nread)
nread = if (detector.isDone) -1 else read(is, buf)
}
}
detector.dataEnd()
val encoding: String = detector.getDetectedCharset()
detector.reset()
if (useMarkReset) is.reset()
if(null != encoding && Charset.isSupported(encoding)) Some(encoding) else None
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy