All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dimajix.shaded.guava.io.ByteSource Maven / Gradle / Ivy

There is a newer version: 1.2.0-synapse3.3-spark3.3-hadoop3.3
Show newest version
/*
 * Copyright (C) 2012 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.dimajix.shaded.guava.io;

import static com.dimajix.shaded.guava.base.Preconditions.checkArgument;
import static com.dimajix.shaded.guava.base.Preconditions.checkNotNull;
import static com.dimajix.shaded.guava.io.ByteStreams.createBuffer;
import static com.dimajix.shaded.guava.io.ByteStreams.skipUpTo;

import com.dimajix.shaded.guava.annotations.Beta;
import com.dimajix.shaded.guava.annotations.GwtIncompatible;
import com.dimajix.shaded.guava.base.Ascii;
import com.dimajix.shaded.guava.base.Optional;
import com.dimajix.shaded.guava.collect.ImmutableList;
import com.dimajix.shaded.guava.hash.Funnels;
import com.dimajix.shaded.guava.hash.HashCode;
import com.dimajix.shaded.guava.hash.HashFunction;
import com.dimajix.shaded.guava.hash.Hasher;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import org.checkerframework.checker.nullness.qual.Nullable;

/**
 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a {@code ByteSource}
 * is not an open, stateful stream for input that can be read and closed. Instead, it is an
 * immutable supplier of {@code InputStream} instances.
 *
 * 

{@code ByteSource} provides two kinds of methods: * *

    *
  • Methods that return a stream: These methods should return a new, independent * instance each time they are called. The caller is responsible for ensuring that the * returned stream is closed. *
  • Convenience methods: These are implementations of common operations that are * typically implemented by opening a stream using one of the methods in the first category, * doing something and finally closing the stream that was opened. *
* *

Note: In general, {@code ByteSource} is intended to be used for "file-like" sources * that provide streams that are: * *

    *
  • Finite: Many operations, such as {@link #size()} and {@link #read()}, will either * block indefinitely or fail if the source creates an infinite stream. *
  • Non-destructive: A destructive stream will consume or otherwise alter the * bytes of the source as they are read from it. A source that provides such streams will not * be reusable, and operations that read from the stream (including {@link #size()}, in some * implementations) will prevent further operations from completing as expected. *
* * @since 14.0 * @author Colin Decker */ @GwtIncompatible @ElementTypesAreNonnullByDefault public abstract class ByteSource { /** Constructor for use by subclasses. */ protected ByteSource() {} /** * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source * as characters using the given {@link Charset}. * *

If {@link CharSource#asByteSource} is called on the returned source with the same charset, * the default implementation of this method will ensure that the original {@code ByteSource} is * returned, rather than round-trip encoding. Subclasses that override this method should behave * the same way. */ public CharSource asCharSource(Charset charset) { return new AsCharSource(charset); } /** * Opens a new {@link InputStream} for reading from this source. This method returns a new, * independent stream each time it is called. * *

The caller is responsible for ensuring that the returned stream is closed. * * @throws IOException if an I/O error occurs while opening the stream */ public abstract InputStream openStream() throws IOException; /** * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is * not required to be a {@link BufferedInputStream} in order to allow implementations to simply * delegate to {@link #openStream()} when the stream returned by that method does not benefit from * additional buffering (for example, a {@code ByteArrayInputStream}). This method returns a new, * independent stream each time it is called. * *

The caller is responsible for ensuring that the returned stream is closed. * * @throws IOException if an I/O error occurs while opening the stream * @since 15.0 (in 14.0 with return type {@link BufferedInputStream}) */ public InputStream openBufferedStream() throws IOException { InputStream in = openStream(); return (in instanceof BufferedInputStream) ? (BufferedInputStream) in : new BufferedInputStream(in); } /** * Returns a view of a slice of this byte source that is at most {@code length} bytes long * starting at the given {@code offset}. If {@code offset} is greater than the size of this * source, the returned source will be empty. If {@code offset + length} is greater than the size * of this source, the returned source will contain the slice starting at {@code offset} and * ending at the end of this source. * * @throws IllegalArgumentException if {@code offset} or {@code length} is negative */ public ByteSource slice(long offset, long length) { return new SlicedByteSource(offset, length); } /** * Returns whether the source has zero bytes. The default implementation first checks {@link * #sizeIfKnown}, returning true if it's known to be zero and false if it's known to be non-zero. * If the size is not known, it falls back to opening a stream and checking for EOF. * *

Note that, in cases where {@code sizeIfKnown} returns zero, it is possible that bytes * are actually available for reading. (For example, some special files may return a size of 0 * despite actually having content when read.) This means that a source may return {@code true} * from {@code isEmpty()} despite having readable content. * * @throws IOException if an I/O error occurs * @since 15.0 */ public boolean isEmpty() throws IOException { Optional sizeIfKnown = sizeIfKnown(); if (sizeIfKnown.isPresent()) { return sizeIfKnown.get() == 0L; } Closer closer = Closer.create(); try { InputStream in = closer.register(openStream()); return in.read() == -1; } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Returns the size of this source in bytes, if the size can be easily determined without actually * opening the data stream. * *

The default implementation returns {@link Optional#absent}. Some sources, such as a file, * may return a non-absent value. Note that in such cases, it is possible that this method * will return a different number of bytes than would be returned by reading all of the bytes (for * example, some special files may return a size of 0 despite actually having content when read). * *

Additionally, for mutable sources such as files, a subsequent read may return a different * number of bytes if the contents are changed. * * @since 19.0 */ @Beta public Optional sizeIfKnown() { return Optional.absent(); } /** * Returns the size of this source in bytes, even if doing so requires opening and traversing an * entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}. * *

The default implementation calls {@link #sizeIfKnown} and returns the value if present. If * absent, it will fall back to a heavyweight operation that will open a stream, read (or {@link * InputStream#skip(long) skip}, if possible) to the end of the stream and return the total number * of bytes that were read. * *

Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient * implementation, it is possible that this method will return a different number of bytes * than would be returned by reading all of the bytes (for example, some special files may return * a size of 0 despite actually having content when read). * *

In either case, for mutable sources such as files, a subsequent read may return a different * number of bytes if the contents are changed. * * @throws IOException if an I/O error occurs while reading the size of this source */ public long size() throws IOException { Optional sizeIfKnown = sizeIfKnown(); if (sizeIfKnown.isPresent()) { return sizeIfKnown.get(); } Closer closer = Closer.create(); try { InputStream in = closer.register(openStream()); return countBySkipping(in); } catch (IOException e) { // skip may not be supported... at any rate, try reading } finally { closer.close(); } closer = Closer.create(); try { InputStream in = closer.register(openStream()); return ByteStreams.exhaust(in); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** Counts the bytes in the given input stream using skip if possible. */ private long countBySkipping(InputStream in) throws IOException { long count = 0; long skipped; while ((skipped = skipUpTo(in, Integer.MAX_VALUE)) > 0) { count += skipped; } return count; } /** * Copies the contents of this byte source to the given {@code OutputStream}. Does not close * {@code output}. * * @return the number of bytes copied * @throws IOException if an I/O error occurs while reading from this source or writing to {@code * output} */ @CanIgnoreReturnValue public long copyTo(OutputStream output) throws IOException { checkNotNull(output); Closer closer = Closer.create(); try { InputStream in = closer.register(openStream()); return ByteStreams.copy(in, output); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Copies the contents of this byte source to the given {@code ByteSink}. * * @return the number of bytes copied * @throws IOException if an I/O error occurs while reading from this source or writing to {@code * sink} */ @CanIgnoreReturnValue public long copyTo(ByteSink sink) throws IOException { checkNotNull(sink); Closer closer = Closer.create(); try { InputStream in = closer.register(openStream()); OutputStream out = closer.register(sink.openStream()); return ByteStreams.copy(in, out); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads the full contents of this byte source as a byte array. * * @throws IOException if an I/O error occurs while reading from this source */ public byte[] read() throws IOException { Closer closer = Closer.create(); try { InputStream in = closer.register(openStream()); Optional size = sizeIfKnown(); return size.isPresent() ? ByteStreams.toByteArray(in, size.get()) : ByteStreams.toByteArray(in); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads the contents of this byte source using the given {@code processor} to process bytes as * they are read. Stops when all bytes have been read or the consumer returns {@code false}. * Returns the result produced by the processor. * * @throws IOException if an I/O error occurs while reading from this source or if {@code * processor} throws an {@code IOException} * @since 16.0 */ @Beta @CanIgnoreReturnValue // some processors won't return a useful result public T read(ByteProcessor processor) throws IOException { checkNotNull(processor); Closer closer = Closer.create(); try { InputStream in = closer.register(openStream()); return ByteStreams.readBytes(in, processor); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Hashes the contents of this byte source using the given hash function. * * @throws IOException if an I/O error occurs while reading from this source */ public HashCode hash(HashFunction hashFunction) throws IOException { Hasher hasher = hashFunction.newHasher(); copyTo(Funnels.asOutputStream(hasher)); return hasher.hash(); } /** * Checks that the contents of this byte source are equal to the contents of the given byte * source. * * @throws IOException if an I/O error occurs while reading from this source or {@code other} */ public boolean contentEquals(ByteSource other) throws IOException { checkNotNull(other); byte[] buf1 = createBuffer(); byte[] buf2 = createBuffer(); Closer closer = Closer.create(); try { InputStream in1 = closer.register(openStream()); InputStream in2 = closer.register(other.openStream()); while (true) { int read1 = ByteStreams.read(in1, buf1, 0, buf1.length); int read2 = ByteStreams.read(in2, buf2, 0, buf2.length); if (read1 != read2 || !Arrays.equals(buf1, buf2)) { return false; } else if (read1 != buf1.length) { return true; } } } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * * @param sources the sources to concatenate * @return a {@code ByteSource} containing the concatenated data * @since 15.0 */ public static ByteSource concat(Iterable sources) { return new ConcatenatedByteSource(sources); } /** * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * *

Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method * is called. This will fail if the iterator is infinite and may cause problems if the iterator * eagerly fetches data for each source when iterated (rather than producing sources that only * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if * possible. * * @param sources the sources to concatenate * @return a {@code ByteSource} containing the concatenated data * @throws NullPointerException if any of {@code sources} is {@code null} * @since 15.0 */ public static ByteSource concat(Iterator sources) { return concat(ImmutableList.copyOf(sources)); } /** * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * * @param sources the sources to concatenate * @return a {@code ByteSource} containing the concatenated data * @throws NullPointerException if any of {@code sources} is {@code null} * @since 15.0 */ public static ByteSource concat(ByteSource... sources) { return concat(ImmutableList.copyOf(sources)); } /** * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}. * *

Note that the given byte array may be passed directly to methods on, for example, {@code * OutputStream} (when {@code copyTo(OutputStream)} is called on the resulting {@code * ByteSource}). This could allow a malicious {@code OutputStream} implementation to modify the * contents of the array, but provides better performance in the normal case. * * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}). */ public static ByteSource wrap(byte[] b) { return new ByteArrayByteSource(b); } /** * Returns an immutable {@link ByteSource} that contains no bytes. * * @since 15.0 */ public static ByteSource empty() { return EmptyByteSource.INSTANCE; } /** * A char source that reads bytes from this source and decodes them as characters using a charset. */ class AsCharSource extends CharSource { final Charset charset; AsCharSource(Charset charset) { this.charset = checkNotNull(charset); } @Override public ByteSource asByteSource(Charset charset) { if (charset.equals(this.charset)) { return ByteSource.this; } return super.asByteSource(charset); } @Override public Reader openStream() throws IOException { return new InputStreamReader(ByteSource.this.openStream(), charset); } @Override public String read() throws IOException { // Reading all the data as a byte array is more efficient than the default read() // implementation because: // 1. the string constructor can avoid an extra copy most of the time by correctly sizing the // internal char array (hard to avoid using StringBuilder) // 2. we avoid extra copies into temporary buffers altogether // The downside is that this will cause us to store the file bytes in memory twice for a short // amount of time. return new String(ByteSource.this.read(), charset); } @Override public String toString() { return ByteSource.this.toString() + ".asCharSource(" + charset + ")"; } } /** A view of a subsection of the containing byte source. */ private final class SlicedByteSource extends ByteSource { final long offset; final long length; SlicedByteSource(long offset, long length) { checkArgument(offset >= 0, "offset (%s) may not be negative", offset); checkArgument(length >= 0, "length (%s) may not be negative", length); this.offset = offset; this.length = length; } @Override public InputStream openStream() throws IOException { return sliceStream(ByteSource.this.openStream()); } @Override public InputStream openBufferedStream() throws IOException { return sliceStream(ByteSource.this.openBufferedStream()); } private InputStream sliceStream(InputStream in) throws IOException { if (offset > 0) { long skipped; try { skipped = ByteStreams.skipUpTo(in, offset); } catch (Throwable e) { Closer closer = Closer.create(); closer.register(in); try { throw closer.rethrow(e); } finally { closer.close(); } } if (skipped < offset) { // offset was beyond EOF in.close(); return new ByteArrayInputStream(new byte[0]); } } return ByteStreams.limit(in, length); } @Override public ByteSource slice(long offset, long length) { checkArgument(offset >= 0, "offset (%s) may not be negative", offset); checkArgument(length >= 0, "length (%s) may not be negative", length); long maxLength = this.length - offset; return maxLength <= 0 ? ByteSource.empty() : ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength)); } @Override public boolean isEmpty() throws IOException { return length == 0 || super.isEmpty(); } @Override public Optional sizeIfKnown() { Optional optionalUnslicedSize = ByteSource.this.sizeIfKnown(); if (optionalUnslicedSize.isPresent()) { long unslicedSize = optionalUnslicedSize.get(); long off = Math.min(offset, unslicedSize); return Optional.of(Math.min(length, unslicedSize - off)); } return Optional.absent(); } @Override public String toString() { return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")"; } } private static class ByteArrayByteSource extends ByteSource { final byte[] bytes; final int offset; final int length; ByteArrayByteSource(byte[] bytes) { this(bytes, 0, bytes.length); } // NOTE: Preconditions are enforced by slice, the only non-trivial caller. ByteArrayByteSource(byte[] bytes, int offset, int length) { this.bytes = bytes; this.offset = offset; this.length = length; } @Override public InputStream openStream() { return new ByteArrayInputStream(bytes, offset, length); } @Override public InputStream openBufferedStream() throws IOException { return openStream(); } @Override public boolean isEmpty() { return length == 0; } @Override public long size() { return length; } @Override public Optional sizeIfKnown() { return Optional.of((long) length); } @Override public byte[] read() { return Arrays.copyOfRange(bytes, offset, offset + length); } @SuppressWarnings("CheckReturnValue") // it doesn't matter what processBytes returns here @Override @ParametricNullness public T read(ByteProcessor processor) throws IOException { processor.processBytes(bytes, offset, length); return processor.getResult(); } @Override public long copyTo(OutputStream output) throws IOException { output.write(bytes, offset, length); return length; } @Override public HashCode hash(HashFunction hashFunction) throws IOException { return hashFunction.hashBytes(bytes, offset, length); } @Override public ByteSource slice(long offset, long length) { checkArgument(offset >= 0, "offset (%s) may not be negative", offset); checkArgument(length >= 0, "length (%s) may not be negative", length); offset = Math.min(offset, this.length); length = Math.min(length, this.length - offset); int newOffset = this.offset + (int) offset; return new ByteArrayByteSource(bytes, newOffset, (int) length); } @Override public String toString() { return "ByteSource.wrap(" + Ascii.truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...") + ")"; } } private static final class EmptyByteSource extends ByteArrayByteSource { static final EmptyByteSource INSTANCE = new EmptyByteSource(); EmptyByteSource() { super(new byte[0]); } @Override public CharSource asCharSource(Charset charset) { checkNotNull(charset); return CharSource.empty(); } @Override public byte[] read() { return bytes; // length is 0, no need to clone } @Override public String toString() { return "ByteSource.empty()"; } } private static final class ConcatenatedByteSource extends ByteSource { final Iterable sources; ConcatenatedByteSource(Iterable sources) { this.sources = checkNotNull(sources); } @Override public InputStream openStream() throws IOException { return new MultiInputStream(sources.iterator()); } @Override public boolean isEmpty() throws IOException { for (ByteSource source : sources) { if (!source.isEmpty()) { return false; } } return true; } @Override public Optional sizeIfKnown() { if (!(sources instanceof Collection)) { // Infinite Iterables can cause problems here. Of course, it's true that most of the other // methods on this class also have potential problems with infinite Iterables. But unlike // those, this method can cause issues even if the user is dealing with a (finite) slice() // of this source, since the slice's sizeIfKnown() method needs to know the size of the // underlying source to know what its size actually is. return Optional.absent(); } long result = 0L; for (ByteSource source : sources) { Optional sizeIfKnown = source.sizeIfKnown(); if (!sizeIfKnown.isPresent()) { return Optional.absent(); } result += sizeIfKnown.get(); if (result < 0) { // Overflow (or one or more sources that returned a negative size, but all bets are off in // that case) // Can't represent anything higher, and realistically there probably isn't anything that // can actually be done anyway with the supposed 8+ exbibytes of data the source is // claiming to have if we get here, so just stop. return Optional.of(Long.MAX_VALUE); } } return Optional.of(result); } @Override public long size() throws IOException { long result = 0L; for (ByteSource source : sources) { result += source.size(); if (result < 0) { // Overflow (or one or more sources that returned a negative size, but all bets are off in // that case) // Can't represent anything higher, and realistically there probably isn't anything that // can actually be done anyway with the supposed 8+ exbibytes of data the source is // claiming to have if we get here, so just stop. return Long.MAX_VALUE; } } return result; } @Override public String toString() { return "ByteSource.concat(" + sources + ")"; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy