All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.signalfx.shaded.apache.commons.io.input.CharSequenceInputStream Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.signalfx.shaded.apache.commons.io.input;

import static com.signalfx.shaded.apache.commons.io.IOUtils.EOF;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Objects;

import com.signalfx.shaded.apache.commons.io.Charsets;
import com.signalfx.shaded.apache.commons.io.IOUtils;
import com.signalfx.shaded.apache.commons.io.build.AbstractStreamBuilder;
import com.signalfx.shaded.apache.commons.io.charset.CharsetEncoders;
import com.signalfx.shaded.apache.commons.io.function.Uncheck;

/**
 * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
 * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
 * 

* Note: Supports {@link #mark(int)} and {@link #reset()}. *

*

* To build an instance, use {@link Builder}. *

* * @see Builder * @since 2.2 */ public class CharSequenceInputStream extends InputStream { //@formatter:off /** * Builds a new {@link CharSequenceInputStream}. * *

* For example: *

*

Using a Charset

*
{@code
     * CharSequenceInputStream s = CharSequenceInputStream.builder()
     *   .setBufferSize(8192)
     *   .setCharSequence("String")
     *   .setCharset(Charset.defaultCharset())
     *   .get();}
     * 
*

Using a CharsetEncoder

*
{@code
     * CharSequenceInputStream s = CharSequenceInputStream.builder()
     *   .setBufferSize(8192)
     *   .setCharSequence("String")
     *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
     *     .onMalformedInput(CodingErrorAction.REPLACE)
     *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
     *   .get();}
     * 
* * @see #get() * @since 2.13.0 */ //@formatter:on public static class Builder extends AbstractStreamBuilder { private CharsetEncoder charsetEncoder = newEncoder(getCharset()); /** * Builds a new {@link CharSequenceInputStream}. *

* You must set input that supports {@link #getCharSequence()}, otherwise, this method throws an exception. *

*

* This builder use the following aspects: *

*
    *
  • {@link #getCharSequence()}
  • *
  • {@link #getBufferSize()}
  • *
  • {@link CharsetEncoder}
  • *
* * @return a new instance. * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. */ @Override public CharSequenceInputStream get() { return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getBufferSize(), charsetEncoder)); } CharsetEncoder getCharsetEncoder() { return charsetEncoder; } @Override public Builder setCharset(final Charset charset) { super.setCharset(charset); charsetEncoder = newEncoder(getCharset()); return this; } /** * Sets the charset encoder. Assumes that the caller has configured the encoder. * * @param newEncoder the charset encoder. * @return this * @since 2.13.0 */ public Builder setCharsetEncoder(final CharsetEncoder newEncoder) { charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault())); super.setCharset(charsetEncoder.charset()); return this; } } private static final int NO_MARK = -1; /** * Constructs a new {@link Builder}. * * @return a new {@link Builder}. * @since 2.12.0 */ public static Builder builder() { return new Builder(); } private static CharsetEncoder newEncoder(final Charset charset) { // @formatter:off return Charsets.toCharset(charset).newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); // @formatter:on } private final ByteBuffer bBuf; private int bBufMark; // position in bBuf private final CharBuffer cBuf; private int cBufMark; // position in cBuf private final CharsetEncoder charsetEncoder; /** * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. * * @param cs the input character sequence. * @param charset the character set name to use. * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} */ @Deprecated public CharSequenceInputStream(final CharSequence cs, final Charset charset) { this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); } /** * Constructs a new instance. * * @param cs the input character sequence. * @param charset the character set name to use, null maps to the default Charset. * @param bufferSize the buffer size to use. * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} */ @Deprecated public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { // @formatter:off this(cs, bufferSize, newEncoder(charset)); // @formatter:on } private CharSequenceInputStream(final CharSequence cs, final int bufferSize, final CharsetEncoder charsetEncoder) { this.charsetEncoder = charsetEncoder; // Ensure that buffer is long enough to hold a complete character this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize)); this.bBuf.flip(); this.cBuf = CharBuffer.wrap(cs); this.cBufMark = NO_MARK; this.bBufMark = NO_MARK; try { fillBuffer(); } catch (final CharacterCodingException ex) { // Reset everything without filling the buffer // so the same exception can be thrown again later. this.bBuf.clear(); this.bBuf.flip(); this.cBuf.rewind(); } } /** * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. * * @param cs the input character sequence. * @param charset the character set name to use. * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} */ @Deprecated public CharSequenceInputStream(final CharSequence cs, final String charset) { this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); } /** * Constructs a new instance. * * @param cs the input character sequence. * @param charset the character set name to use, null maps to the default Charset. * @param bufferSize the buffer size to use. * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} */ @Deprecated public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { this(cs, Charsets.toCharset(charset), bufferSize); } /** * Gets a lower bound on the number of bytes remaining in the byte stream. * * @return the count of bytes that can be read without blocking (or returning EOF). * @throws IOException if an error occurs (probably not possible). */ @Override public int available() throws IOException { return this.bBuf.remaining(); } @Override public void close() throws IOException { // noop } /** * Fills the byte output buffer from the input char buffer. * * @throws CharacterCodingException * an error encoding data. */ private void fillBuffer() throws CharacterCodingException { this.bBuf.compact(); final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true); if (result.isError()) { result.throwException(); } this.bBuf.flip(); } /** * Gets the CharsetEncoder. * * @return the CharsetEncoder. */ CharsetEncoder getCharsetEncoder() { return charsetEncoder; } /** * {@inheritDoc} * @param readLimit max read limit (ignored). */ @Override public synchronized void mark(final int readLimit) { this.cBufMark = this.cBuf.position(); this.bBufMark = this.bBuf.position(); this.cBuf.mark(); this.bBuf.mark(); // It would be nice to be able to use mark & reset on the cBuf and bBuf; // however the bBuf is re-used so that won't work } @Override public boolean markSupported() { return true; } @Override public int read() throws IOException { for (;;) { if (this.bBuf.hasRemaining()) { return this.bBuf.get() & 0xFF; } fillBuffer(); if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { return EOF; } } } @Override public int read(final byte[] b) throws IOException { return read(b, 0, b.length); } @Override public int read(final byte[] array, int off, int len) throws IOException { Objects.requireNonNull(array, "array"); if (len < 0 || off + len > array.length) { throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len); } if (len == 0) { return 0; // must return 0 for zero length read } if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { return EOF; } int bytesRead = 0; while (len > 0) { if (this.bBuf.hasRemaining()) { final int chunk = Math.min(this.bBuf.remaining(), len); this.bBuf.get(array, off, chunk); off += chunk; len -= chunk; bytesRead += chunk; } else { fillBuffer(); if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { break; } } } return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead; } @Override public synchronized void reset() throws IOException { // // This is not the most efficient implementation, as it re-encodes from the beginning. // // Since the bBuf is re-used, in general it's necessary to re-encode the data. // // It should be possible to apply some optimizations however: // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to // restart from there. // if (this.cBufMark != NO_MARK) { // if cBuf is at 0, we have not started reading anything, so skip re-encoding if (this.cBuf.position() != 0) { this.charsetEncoder.reset(); this.cBuf.rewind(); this.bBuf.rewind(); this.bBuf.limit(0); // rewind does not clear the buffer while (this.cBuf.position() < this.cBufMark) { this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing) this.bBuf.limit(0); fillBuffer(); } } if (this.cBuf.position() != this.cBufMark) { throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " + "expected=" + this.cBufMark); } this.bBuf.position(this.bBufMark); this.cBufMark = NO_MARK; this.bBufMark = NO_MARK; } mark(0); } @Override public long skip(long n) throws IOException { // // This could be made more efficient by using position to skip within the current buffer. // long skipped = 0; while (n > 0 && available() > 0) { this.read(); n--; skipped++; } return skipped; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy