com.google.common.io.CharSource Maven / Gradle / Ivy
/*
* Copyright (C) 2012 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.io;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.base.Ascii;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Streams;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import java.util.stream.Stream;
/**
* A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
* CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
* it is an immutable supplier of {@code Reader} instances.
*
* {@code CharSource} provides two kinds of methods:
*
*
* - Methods that return a reader: These methods should return a new, independent
* instance each time they are called. The caller is responsible for ensuring that the
* returned reader is closed.
*
- Convenience methods: These are implementations of common operations that are
* typically implemented by opening a reader using one of the methods in the first category,
* doing something and finally closing the reader that was opened.
*
*
* Several methods in this class, such as {@link #readLines()}, break the contents of the source
* into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code
* \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to
* be an empty line at the end if the contents are terminated with a line separator.
*
*
Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
* encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
*
* @since 14.0
* @author Colin Decker
*/
@GwtIncompatible
public abstract class CharSource {
/** Constructor for use by subclasses. */
protected CharSource() {}
/**
* Returns a {@link ByteSource} view of this char source that encodes chars read from this source
* as bytes using the given {@link Charset}.
*
*
If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
* the default implementation of this method will ensure that the original {@code CharSource} is
* returned, rather than round-trip encoding. Subclasses that override this method should behave
* the same way.
*
* @since 20.0
*/
@Beta
public ByteSource asByteSource(Charset charset) {
return new AsByteSource(charset);
}
/**
* Opens a new {@link Reader} for reading from this source. This method returns a new, independent
* reader each time it is called.
*
*
The caller is responsible for ensuring that the returned reader is closed.
*
* @throws IOException if an I/O error occurs while opening the reader
*/
public abstract Reader openStream() throws IOException;
/**
* Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
* independent reader each time it is called.
*
*
The caller is responsible for ensuring that the returned reader is closed.
*
* @throws IOException if an I/O error occurs while of opening the reader
*/
public BufferedReader openBufferedStream() throws IOException {
Reader reader = openStream();
return (reader instanceof BufferedReader)
? (BufferedReader) reader
: new BufferedReader(reader);
}
/**
* Opens a new {@link Stream} for reading text one line at a time from this source. This method
* returns a new, independent stream each time it is called.
*
*
The returned stream is lazy and only reads from the source in the terminal operation. If an
* I/O error occurs while the stream is reading from the source or when the stream is closed, an
* {@link UncheckedIOException} is thrown.
*
*
Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
* text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
* \n}. If the source's content does not end in a line termination sequence, it is treated as if
* it does.
*
*
The caller is responsible for ensuring that the returned stream is closed. For example:
*
*
{@code
* try (Stream lines = source.lines()) {
* lines.map(...)
* .filter(...)
* .forEach(...);
* }
* }
*
* @throws IOException if an I/O error occurs while opening the stream
* @since 22.0
*/
@Beta
public Stream lines() throws IOException {
BufferedReader reader = openBufferedStream();
return reader
.lines()
.onClose(
() -> {
try {
reader.close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
/**
* Returns the size of this source in chars, if the size can be easily determined without actually
* opening the data stream.
*
* The default implementation returns {@link Optional#absent}. Some sources, such as a {@code
* CharSequence}, may return a non-absent value. Note that in such cases, it is possible
* that this method will return a different number of chars than would be returned by reading all
* of the chars.
*
*
Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
* return a different number of chars if the contents are changed.
*
* @since 19.0
*/
@Beta
public Optional lengthIfKnown() {
return Optional.absent();
}
/**
* Returns the length of this source in chars, even if doing so requires opening and traversing an
* entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
*
* The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
* absent, it will fall back to a heavyweight operation that will open a stream, {@link
* Reader#skip(long) skip} to the end of the stream, and return the total number of chars that
* were skipped.
*
*
Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
* implementation, it is possible that this method will return a different number of chars
* than would be returned by reading all of the chars.
*
*
In either case, for mutable sources such as files, a subsequent read may return a different
* number of chars if the contents are changed.
*
* @throws IOException if an I/O error occurs while reading the length of this source
* @since 19.0
*/
@Beta
public long length() throws IOException {
Optional lengthIfKnown = lengthIfKnown();
if (lengthIfKnown.isPresent()) {
return lengthIfKnown.get();
}
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return countBySkipping(reader);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
private long countBySkipping(Reader reader) throws IOException {
long count = 0;
long read;
while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
count += read;
}
return count;
}
/**
* Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
* Does not close {@code appendable} if it is {@code Closeable}.
*
* @return the number of characters copied
* @throws IOException if an I/O error occurs while reading from this source or writing to {@code
* appendable}
*/
public long copyTo(Appendable appendable) throws IOException {
checkNotNull(appendable);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.copy(reader, appendable);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Copies the contents of this source to the given sink.
*
* @return the number of characters copied
* @throws IOException if an I/O error occurs while reading from this source or writing to {@code
* sink}
*/
public long copyTo(CharSink sink) throws IOException {
checkNotNull(sink);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
Writer writer = closer.register(sink.openStream());
return CharStreams.copy(reader, writer);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads the contents of this source as a string.
*
* @throws IOException if an I/O error occurs while reading from this source
*/
public String read() throws IOException {
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.toString(reader);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads the first line of this source as a string. Returns {@code null} if this source is empty.
*
* Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
* text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
* \n}. If the source's content does not end in a line termination sequence, it is treated as if
* it does.
*
* @throws IOException if an I/O error occurs while reading from this source
*/
public String readFirstLine() throws IOException {
Closer closer = Closer.create();
try {
BufferedReader reader = closer.register(openBufferedStream());
return reader.readLine();
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads all the lines of this source as a list of strings. The returned list will be empty if
* this source is empty.
*
*
Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
* text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
* \n}. If the source's content does not end in a line termination sequence, it is treated as if
* it does.
*
* @throws IOException if an I/O error occurs while reading from this source
*/
public ImmutableList readLines() throws IOException {
Closer closer = Closer.create();
try {
BufferedReader reader = closer.register(openBufferedStream());
List result = Lists.newArrayList();
String line;
while ((line = reader.readLine()) != null) {
result.add(line);
}
return ImmutableList.copyOf(result);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads lines of text from this source, processing each line as it is read using the given {@link
* LineProcessor processor}. Stops when all lines have been processed or the processor returns
* {@code false} and returns the result produced by the processor.
*
* Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
* text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
* \n}. If the source's content does not end in a line termination sequence, it is treated as if
* it does.
*
* @throws IOException if an I/O error occurs while reading from this source or if {@code
* processor} throws an {@code IOException}
* @since 16.0
*/
@Beta
// some processors won't return a useful result
public T readLines(LineProcessor processor) throws IOException {
checkNotNull(processor);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.readLines(reader, processor);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads all lines of text from this source, running the given {@code action} for each line as it
* is read.
*
* Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
* text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
* \n}. If the source's content does not end in a line termination sequence, it is treated as if
* it does.
*
* @throws IOException if an I/O error occurs while reading from this source or if {@code action}
* throws an {@code UncheckedIOException}
* @since 22.0
*/
@Beta
public void forEachLine(Consumer super String> action) throws IOException {
try (Stream lines = lines()) {
// The lines should be ordered regardless in most cases, but use forEachOrdered to be sure
lines.forEachOrdered(action);
} catch (UncheckedIOException e) {
throw e.getCause();
}
}
/**
* Returns whether the source has zero chars. The default implementation first checks {@link
* #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
* non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
*
* Note that, in cases where {@code lengthIfKnown} returns zero, it is possible that
* chars are actually available for reading. This means that a source may return {@code true} from
* {@code isEmpty()} despite having readable content.
*
* @throws IOException if an I/O error occurs
* @since 15.0
*/
public boolean isEmpty() throws IOException {
Optional lengthIfKnown = lengthIfKnown();
if (lengthIfKnown.isPresent()) {
return lengthIfKnown.get() == 0L;
}
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return reader.read() == -1;
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
* Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @since 15.0
*/
public static CharSource concat(Iterable extends CharSource> sources) {
return new ConcatenatedCharSource(sources);
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
*
Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
*
Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
* is called. This will fail if the iterator is infinite and may cause problems if the iterator
* eagerly fetches data for each source when iterated (rather than producing sources that only
* load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
* possible.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @throws NullPointerException if any of {@code sources} is {@code null}
* @since 15.0
*/
public static CharSource concat(Iterator extends CharSource> sources) {
return concat(ImmutableList.copyOf(sources));
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
*
Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @throws NullPointerException if any of {@code sources} is {@code null}
* @since 15.0
*/
public static CharSource concat(CharSource... sources) {
return concat(ImmutableList.copyOf(sources));
}
/**
* Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
* returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
* the {@code charSequence} is mutated while it is being read, so don't do that.
*
* @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
*/
public static CharSource wrap(CharSequence charSequence) {
return charSequence instanceof String
? new StringCharSource((String) charSequence)
: new CharSequenceCharSource(charSequence);
}
/**
* Returns an immutable {@link CharSource} that contains no characters.
*
* @since 15.0
*/
public static CharSource empty() {
return EmptyCharSource.INSTANCE;
}
/** A byte source that reads chars from this source and encodes them as bytes using a charset. */
private final class AsByteSource extends ByteSource {
final Charset charset;
AsByteSource(Charset charset) {
this.charset = checkNotNull(charset);
}
@Override
public CharSource asCharSource(Charset charset) {
if (charset.equals(this.charset)) {
return CharSource.this;
}
return super.asCharSource(charset);
}
@Override
public InputStream openStream() throws IOException {
return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
}
@Override
public String toString() {
return CharSource.this.toString() + ".asByteSource(" + charset + ")";
}
}
private static class CharSequenceCharSource extends CharSource {
private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
protected final CharSequence seq;
protected CharSequenceCharSource(CharSequence seq) {
this.seq = checkNotNull(seq);
}
@Override
public Reader openStream() {
return new CharSequenceReader(seq);
}
@Override
public String read() {
return seq.toString();
}
@Override
public boolean isEmpty() {
return seq.length() == 0;
}
@Override
public long length() {
return seq.length();
}
@Override
public Optional lengthIfKnown() {
return Optional.of((long) seq.length());
}
/**
* Returns an iterator over the lines in the string. If the string ends in a newline, a final
* empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
*/
private Iterator linesIterator() {
return new AbstractIterator() {
Iterator lines = LINE_SPLITTER.split(seq).iterator();
@Override
protected String computeNext() {
if (lines.hasNext()) {
String next = lines.next();
// skip last line if it's empty
if (lines.hasNext() || !next.isEmpty()) {
return next;
}
}
return endOfData();
}
};
}
@Override
public Stream lines() {
return Streams.stream(linesIterator());
}
@Override
public String readFirstLine() {
Iterator lines = linesIterator();
return lines.hasNext() ? lines.next() : null;
}
@Override
public ImmutableList readLines() {
return ImmutableList.copyOf(linesIterator());
}
@Override
public T readLines(LineProcessor processor) throws IOException {
Iterator lines = linesIterator();
while (lines.hasNext()) {
if (!processor.processLine(lines.next())) {
break;
}
}
return processor.getResult();
}
@Override
public String toString() {
return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
}
}
/**
* Subclass specialized for string instances.
*
* Since Strings are immutable and built into the jdk we can optimize some operations
*
*
* - use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
* use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
* one with {@link CharSequence#charAt(int)}.
*
- use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
* #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
* can't change, and it is faster because many writers and appendables are optimized for
* appending string instances.
*
*/
private static class StringCharSource extends CharSequenceCharSource {
protected StringCharSource(String seq) {
super(seq);
}
@Override
public Reader openStream() {
return new StringReader((String) seq);
}
@Override
public long copyTo(Appendable appendable) throws IOException {
appendable.append(seq);
return seq.length();
}
@Override
public long copyTo(CharSink sink) throws IOException {
checkNotNull(sink);
Closer closer = Closer.create();
try {
Writer writer = closer.register(sink.openStream());
writer.write((String) seq);
return seq.length();
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
}
private static final class EmptyCharSource extends StringCharSource {
private static final EmptyCharSource INSTANCE = new EmptyCharSource();
private EmptyCharSource() {
super("");
}
@Override
public String toString() {
return "CharSource.empty()";
}
}
private static final class ConcatenatedCharSource extends CharSource {
private final Iterable extends CharSource> sources;
ConcatenatedCharSource(Iterable extends CharSource> sources) {
this.sources = checkNotNull(sources);
}
@Override
public Reader openStream() throws IOException {
return new MultiReader(sources.iterator());
}
@Override
public boolean isEmpty() throws IOException {
for (CharSource source : sources) {
if (!source.isEmpty()) {
return false;
}
}
return true;
}
@Override
public Optional lengthIfKnown() {
long result = 0L;
for (CharSource source : sources) {
Optional lengthIfKnown = source.lengthIfKnown();
if (!lengthIfKnown.isPresent()) {
return Optional.absent();
}
result += lengthIfKnown.get();
}
return Optional.of(result);
}
@Override
public long length() throws IOException {
long result = 0L;
for (CharSource source : sources) {
result += source.length();
}
return result;
}
@Override
public String toString() {
return "CharSource.concat(" + sources + ")";
}
}
}