All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.signalfx.shaded.apache.commons.io.input.ReaderInputStream Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.signalfx.shaded.apache.commons.io.input;

import static com.signalfx.shaded.apache.commons.io.IOUtils.EOF;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Objects;

import com.signalfx.shaded.apache.commons.io.Charsets;
import com.signalfx.shaded.apache.commons.io.IOUtils;
import com.signalfx.shaded.apache.commons.io.build.AbstractStreamBuilder;
import com.signalfx.shaded.apache.commons.io.charset.CharsetEncoders;

/**
 * {@link InputStream} implementation that reads a character stream from a {@link Reader} and transforms it to a byte stream using a specified charset encoding.
 * The stream is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset encodings supported by the JRE are handled correctly. In
 * particular for charsets such as UTF-16, the implementation ensures that one and only one byte order marker is produced.
 * 

* Since in general it is not possible to predict the number of characters to be read from the {@link Reader} to satisfy a read request on the * {@link ReaderInputStream}, all reads from the {@link Reader} are buffered. There is therefore no well defined correlation between the current position of the * {@link Reader} and that of the {@link ReaderInputStream}. This also implies that in general there is no need to wrap the underlying {@link Reader} in a * {@link java.io.BufferedReader}. *

*

* {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader}; in the following example, reading from {@code in2} * would return the same byte sequence as reading from {@code in} (provided that the initial byte sequence is legal with respect to the charset encoding): *

*

* To build an instance, use {@link Builder}. *

*
 * InputStream inputStream = ...
 * Charset cs = ...
 * InputStreamReader reader = new InputStreamReader(inputStream, cs);
 * ReaderInputStream in2 = ReaderInputStream.builder()
 *   .setReader(reader)
 *   .setCharset(cs)
 *   .get();
 * 
*

* {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter}, except that the control flow is reversed: both classes * transform a character stream into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream, while {@link ReaderInputStream} * pulls it from the underlying stream. *

*

* Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in * the design of the code. This class is typically used in situations where an existing API only accepts an {@link InputStream}, but where the most natural way * to produce the data is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation where this problem may appear is when * implementing the {@code javax.activation.DataSource} interface from the Java Activation Framework. *

*

* The {@link #available()} method of this class always returns 0. The methods {@link #mark(int)} and {@link #reset()} are not supported. *

*

* Instances of {@link ReaderInputStream} are not thread safe. *

* * @see Builder * @see org.apache.commons.io.output.WriterOutputStream * @since 2.0 */ public class ReaderInputStream extends InputStream { // @formatter:off /** * Builds a new {@link ReaderInputStream}. * *

* For example: *

*
{@code
     * ReaderInputStream s = ReaderInputStream.builder()
     *   .setPath(path)
     *   .setCharsetEncoder(Charset.defaultCharset().newEncoder())
     *   .get();}
     * 
* * @see #get() * @since 2.12.0 */ // @formatter:on public static class Builder extends AbstractStreamBuilder { private CharsetEncoder charsetEncoder = newEncoder(getCharset()); /** * Builds a new {@link ReaderInputStream}. * *

* You must set input that supports {@link #getReader()}, otherwise, this method throws an exception. *

*

* This builder use the following aspects: *

*
    *
  • {@link #getReader()}
  • *
  • {@link #getBufferSize()}
  • *
  • {@link #getCharset()}
  • *
  • {@link CharsetEncoder}
  • *
* * @return a new instance. * @throws UnsupportedOperationException if the origin cannot provide a Reader. * @throws IllegalStateException if the {@code origin} is {@code null}. * @see #getReader() * @see CharsetEncoder * @see #getBufferSize() */ @SuppressWarnings("resource") @Override public ReaderInputStream get() throws IOException { return new ReaderInputStream(getReader(), charsetEncoder, getBufferSize()); } CharsetEncoder getCharsetEncoder() { return charsetEncoder; } @Override public Builder setCharset(final Charset charset) { super.setCharset(charset); charsetEncoder = newEncoder(getCharset()); return this; } /** * Sets the charset encoder. Assumes that the caller has configured the encoder. * * @param newEncoder the charset encoder, null resets to a default encoder. * @return this */ public Builder setCharsetEncoder(final CharsetEncoder newEncoder) { charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault())); super.setCharset(charsetEncoder.charset()); return this; } } /** * Constructs a new {@link Builder}. * * @return a new {@link Builder}. * @since 2.12.0 */ public static Builder builder() { return new Builder(); } static int checkMinBufferSize(final CharsetEncoder charsetEncoder, final int bufferSize) { final float minRequired = minBufferSize(charsetEncoder); if (bufferSize < minRequired) { throw new IllegalArgumentException(String.format("Buffer size %,d must be at least %s for a CharsetEncoder %s.", bufferSize, minRequired, charsetEncoder.charset().displayName())); } return bufferSize; } static float minBufferSize(final CharsetEncoder charsetEncoder) { return charsetEncoder.maxBytesPerChar() * 2; } private static CharsetEncoder newEncoder(final Charset charset) { // @formatter:off return Charsets.toCharset(charset).newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); // @formatter:on } private final Reader reader; private final CharsetEncoder charsetEncoder; /** * CharBuffer used as input for the decoder. It should be reasonably large as we read data from the underlying Reader into this buffer. */ private final CharBuffer encoderIn; /** * ByteBuffer used as output for the decoder. This buffer can be small as it is only used to transfer data from the decoder to the buffer provided by the * caller. */ private final ByteBuffer encoderOut; private CoderResult lastCoderResult; private boolean endOfInput; /** * Constructs a new {@link ReaderInputStream} that uses the default character encoding with a default input buffer size of * {@value IOUtils#DEFAULT_BUFFER_SIZE} characters. * * @param reader the target {@link Reader} * @deprecated Use {@link ReaderInputStream#builder()} instead */ @Deprecated public ReaderInputStream(final Reader reader) { this(reader, Charset.defaultCharset()); } /** * Constructs a new {@link ReaderInputStream} with a default input buffer size of {@value IOUtils#DEFAULT_BUFFER_SIZE} characters. * *

* The encoder created for the specified charset will use {@link CodingErrorAction#REPLACE} for malformed input and unmappable characters. *

* * @param reader the target {@link Reader} * @param charset the charset encoding * @deprecated Use {@link ReaderInputStream#builder()} instead, will be protected for subclasses. */ @Deprecated public ReaderInputStream(final Reader reader, final Charset charset) { this(reader, charset, IOUtils.DEFAULT_BUFFER_SIZE); } /** * Constructs a new {@link ReaderInputStream}. * *

* The encoder created for the specified charset will use {@link CodingErrorAction#REPLACE} for malformed input and unmappable characters. *

* * @param reader the target {@link Reader}. * @param charset the charset encoding. * @param bufferSize the size of the input buffer in number of characters. * @deprecated Use {@link ReaderInputStream#builder()} instead */ @Deprecated public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) { // @formatter:off this(reader, Charsets.toCharset(charset).newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE), bufferSize); // @formatter:on } /** * Constructs a new {@link ReaderInputStream}. * *

* This constructor does not call {@link CharsetEncoder#reset() reset} on the provided encoder. The caller of this constructor should do this when providing * an encoder which had already been in use. *

* * @param reader the target {@link Reader} * @param charsetEncoder the charset encoder * @since 2.1 * @deprecated Use {@link ReaderInputStream#builder()} instead */ @Deprecated public ReaderInputStream(final Reader reader, final CharsetEncoder charsetEncoder) { this(reader, charsetEncoder, IOUtils.DEFAULT_BUFFER_SIZE); } /** * Constructs a new {@link ReaderInputStream}. * *

* This constructor does not call {@link CharsetEncoder#reset() reset} on the provided encoder. The caller of this constructor should do this when providing * an encoder which had already been in use. *

* * @param reader the target {@link Reader} * @param charsetEncoder the charset encoder, null defaults to the default Charset encoder. * @param bufferSize the size of the input buffer in number of characters * @since 2.1 * @deprecated Use {@link ReaderInputStream#builder()} instead */ @Deprecated public ReaderInputStream(final Reader reader, final CharsetEncoder charsetEncoder, final int bufferSize) { this.reader = reader; this.charsetEncoder = CharsetEncoders.toCharsetEncoder(charsetEncoder); this.encoderIn = CharBuffer.allocate(checkMinBufferSize(this.charsetEncoder, bufferSize)); this.encoderIn.flip(); this.encoderOut = ByteBuffer.allocate(128); this.encoderOut.flip(); } /** * Constructs a new {@link ReaderInputStream} with a default input buffer size of {@value IOUtils#DEFAULT_BUFFER_SIZE} characters. * *

* The encoder created for the specified charset will use {@link CodingErrorAction#REPLACE} for malformed input and unmappable characters. *

* * @param reader the target {@link Reader} * @param charsetName the name of the charset encoding * @deprecated Use {@link ReaderInputStream#builder()} instead */ @Deprecated public ReaderInputStream(final Reader reader, final String charsetName) { this(reader, charsetName, IOUtils.DEFAULT_BUFFER_SIZE); } /** * Constructs a new {@link ReaderInputStream}. * *

* The encoder created for the specified charset will use {@link CodingErrorAction#REPLACE} for malformed input and unmappable characters. *

* * @param reader the target {@link Reader} * @param charsetName the name of the charset encoding, null maps to the default Charset. * @param bufferSize the size of the input buffer in number of characters * @deprecated Use {@link ReaderInputStream#builder()} instead */ @Deprecated public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) { this(reader, Charsets.toCharset(charsetName), bufferSize); } /** * Closes the stream. This method will cause the underlying {@link Reader} to be closed. * * @throws IOException if an I/O error occurs. */ @Override public void close() throws IOException { reader.close(); } /** * Fills the internal char buffer from the reader. * * @throws IOException If an I/O error occurs */ private void fillBuffer() throws IOException { if (endOfInput) { return; } if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { encoderIn.compact(); final int position = encoderIn.position(); // We don't use Reader#read(CharBuffer) here because it is more efficient // to write directly to the underlying char array (the default implementation // copies data to a temporary char array). final int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); if (c == EOF) { endOfInput = true; } else { encoderIn.position(position + c); } encoderIn.flip(); } encoderOut.compact(); lastCoderResult = charsetEncoder.encode(encoderIn, encoderOut, endOfInput); if (endOfInput) { lastCoderResult = charsetEncoder.flush(encoderOut); } if (lastCoderResult.isError()) { lastCoderResult.throwException(); } encoderOut.flip(); } /** * Gets the CharsetEncoder. * * @return the CharsetEncoder. */ CharsetEncoder getCharsetEncoder() { return charsetEncoder; } /** * Reads a single byte. * * @return either the byte read or {@code -1} if the end of the stream has been reached * @throws IOException if an I/O error occurs. */ @Override public int read() throws IOException { for (;;) { if (encoderOut.hasRemaining()) { return encoderOut.get() & 0xFF; } fillBuffer(); if (endOfInput && !encoderOut.hasRemaining()) { return EOF; } } } /** * Reads the specified number of bytes into an array. * * @param b the byte array to read into * @return the number of bytes read or {@code -1} if the end of the stream has been reached * @throws IOException if an I/O error occurs. */ @Override public int read(final byte[] b) throws IOException { return read(b, 0, b.length); } /** * Reads the specified number of bytes into an array. * * @param array the byte array to read into * @param off the offset to start reading bytes into * @param len the number of bytes to read * @return the number of bytes read or {@code -1} if the end of the stream has been reached * @throws IOException if an I/O error occurs. */ @Override public int read(final byte[] array, int off, int len) throws IOException { Objects.requireNonNull(array, "array"); if (len < 0 || off < 0 || off + len > array.length) { throw new IndexOutOfBoundsException("Array size=" + array.length + ", offset=" + off + ", length=" + len); } int read = 0; if (len == 0) { return 0; // Always return 0 if len == 0 } while (len > 0) { if (encoderOut.hasRemaining()) { // Data from the last read not fully copied final int c = Math.min(encoderOut.remaining(), len); encoderOut.get(array, off, c); off += c; len -= c; read += c; } else if (endOfInput) { // Already reach EOF in the last read break; } else { // Read again fillBuffer(); } } return read == 0 && endOfInput ? EOF : read; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy