com.google.common.io.CharSource Maven / Gradle / Ivy
/*
* Copyright (C) 2012 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.io;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.base.Ascii;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.List;
import javax.annotation.Nullable;
/**
* A readable source of characters, such as a text file. Unlike a {@link Reader}, a
* {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
* Instead, it is an immutable supplier of {@code Reader} instances.
*
* {@code CharSource} provides two kinds of methods:
*
* - Methods that return a reader: These methods should return a new, independent
* instance each time they are called. The caller is responsible for ensuring that the returned
* reader is closed.
*
- Convenience methods: These are implementations of common operations that are typically
* implemented by opening a reader using one of the methods in the first category, doing
* something and finally closing the reader that was opened.
*
*
* Several methods in this class, such as {@link #readLines()}, break the contents of the source
* into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
* {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
* there to be an empty line at the end if the contents are terminated with a line separator.
*
*
Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
* encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
*
* @since 14.0
* @author Colin Decker
*/
@GwtIncompatible
public abstract class CharSource {
/**
* Constructor for use by subclasses.
*/
protected CharSource() {}
/**
* Returns a {@link ByteSource} view of this char source that encodes chars read from this source
* as bytes using the given {@link Charset}.
*
*
If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
* the default implementation of this method will ensure that the original {@code CharSource} is
* returned, rather than round-trip encoding. Subclasses that override this method should behave
* the same way.
*
* @since 20.0
*/
@Beta
public ByteSource asByteSource(Charset charset) {
return new AsByteSource(charset);
}
/**
* Opens a new {@link Reader} for reading from this source. This method should return a new,
* independent reader each time it is called.
*
*
The caller is responsible for ensuring that the returned reader is closed.
*
* @throws IOException if an I/O error occurs in the process of opening the reader
*/
public abstract Reader openStream() throws IOException;
/**
* Opens a new {@link BufferedReader} for reading from this source. This method should return a
* new, independent reader each time it is called.
*
*
The caller is responsible for ensuring that the returned reader is closed.
*
* @throws IOException if an I/O error occurs in the process of opening the reader
*/
public BufferedReader openBufferedStream() throws IOException {
Reader reader = openStream();
return (reader instanceof BufferedReader)
? (BufferedReader) reader
: new BufferedReader(reader);
}
/**
* Returns the size of this source in chars, if the size can be easily determined without actually
* opening the data stream.
*
*
The default implementation returns {@link Optional#absent}. Some sources, such as a
* {@code CharSequence}, may return a non-absent value. Note that in such cases, it is
* possible that this method will return a different number of chars than would be returned
* by reading all of the chars.
*
*
Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
* return a different number of chars if the contents are changed.
*
* @since 19.0
*/
@Beta
public Optional lengthIfKnown() {
return Optional.absent();
}
/**
* Returns the length of this source in chars, even if doing so requires opening and traversing an
* entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
*
* The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
* absent, it will fall back to a heavyweight operation that will open a stream,
* {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars
* that were skipped.
*
*
Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
* implementation, it is possible that this method will return a different number of chars
* than would be returned by reading all of the chars.
*
*
In either case, for mutable sources such as files, a subsequent read may return a different
* number of chars if the contents are changed.
*
* @throws IOException if an I/O error occurs in the process of reading the length of this source
* @since 19.0
*/
@Beta
public long length() throws IOException {
Optional lengthIfKnown = lengthIfKnown();
if (lengthIfKnown.isPresent()) {
return lengthIfKnown.get();
}
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return countBySkipping(reader);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
private long countBySkipping(Reader reader) throws IOException {
long count = 0;
long read;
while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
count += read;
}
return count;
}
/**
* Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
* Does not close {@code appendable} if it is {@code Closeable}.
*
* @return the number of characters copied
* @throws IOException if an I/O error occurs in the process of reading from this source or
* writing to {@code appendable}
*/
@CanIgnoreReturnValue
public long copyTo(Appendable appendable) throws IOException {
checkNotNull(appendable);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.copy(reader, appendable);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Copies the contents of this source to the given sink.
*
* @return the number of characters copied
* @throws IOException if an I/O error occurs in the process of reading from this source or
* writing to {@code sink}
*/
@CanIgnoreReturnValue
public long copyTo(CharSink sink) throws IOException {
checkNotNull(sink);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
Writer writer = closer.register(sink.openStream());
return CharStreams.copy(reader, writer);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads the contents of this source as a string.
*
* @throws IOException if an I/O error occurs in the process of reading from this source
*/
public String read() throws IOException {
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.toString(reader);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads the first line of this source as a string. Returns {@code null} if this source is empty.
*
* Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
* {@code \r\n}, does not include the line separator in the returned line and does not consider
* there to be an extra empty line at the end if the content is terminated with a line separator.
*
* @throws IOException if an I/O error occurs in the process of reading from this source
*/
@Nullable
public String readFirstLine() throws IOException {
Closer closer = Closer.create();
try {
BufferedReader reader = closer.register(openBufferedStream());
return reader.readLine();
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads all the lines of this source as a list of strings. The returned list will be empty if
* this source is empty.
*
*
Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
* {@code \r\n}, does not include the line separator in the returned lines and does not consider
* there to be an extra empty line at the end if the content is terminated with a line separator.
*
* @throws IOException if an I/O error occurs in the process of reading from this source
*/
public ImmutableList readLines() throws IOException {
Closer closer = Closer.create();
try {
BufferedReader reader = closer.register(openBufferedStream());
List result = Lists.newArrayList();
String line;
while ((line = reader.readLine()) != null) {
result.add(line);
}
return ImmutableList.copyOf(result);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads lines of text from this source, processing each line as it is read using the given
* {@link LineProcessor processor}. Stops when all lines have been processed or the processor
* returns {@code false} and returns the result produced by the processor.
*
* Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
* {@code \r\n}, does not include the line separator in the lines passed to the {@code processor}
* and does not consider there to be an extra empty line at the end if the content is terminated
* with a line separator.
*
* @throws IOException if an I/O error occurs in the process of reading from this source or if
* {@code processor} throws an {@code IOException}
* @since 16.0
*/
@Beta
@CanIgnoreReturnValue // some processors won't return a useful result
public T readLines(LineProcessor processor) throws IOException {
checkNotNull(processor);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.readLines(reader, processor);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Returns whether the source has zero chars. The default implementation returns true if
* {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking for EOF if
* the length is not known.
*
* Note that, in cases where {@code lengthIfKnown} returns zero, it is possible that
* chars are actually available for reading. This means that a source may return {@code true} from
* {@code isEmpty()} despite having readable content.
*
* @throws IOException if an I/O error occurs
* @since 15.0
*/
public boolean isEmpty() throws IOException {
Optional lengthIfKnown = lengthIfKnown();
if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) {
return true;
}
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return reader.read() == -1;
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
* Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @since 15.0
*/
public static CharSource concat(Iterable extends CharSource> sources) {
return new ConcatenatedCharSource(sources);
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
*
Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
*
Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
* is called. This will fail if the iterator is infinite and may cause problems if the iterator
* eagerly fetches data for each source when iterated (rather than producing sources that only
* load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
* possible.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @throws NullPointerException if any of {@code sources} is {@code null}
* @since 15.0
*/
public static CharSource concat(Iterator extends CharSource> sources) {
return concat(ImmutableList.copyOf(sources));
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
*
Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @throws NullPointerException if any of {@code sources} is {@code null}
* @since 15.0
*/
public static CharSource concat(CharSource... sources) {
return concat(ImmutableList.copyOf(sources));
}
/**
* Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
* returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
* the {@code charSequence} is mutated while it is being read, so don't do that.
*
* @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
*/
public static CharSource wrap(CharSequence charSequence) {
return new CharSequenceCharSource(charSequence);
}
/**
* Returns an immutable {@link CharSource} that contains no characters.
*
* @since 15.0
*/
public static CharSource empty() {
return EmptyCharSource.INSTANCE;
}
/**
* A byte source that reads chars from this source and encodes them as bytes using a charset.
*/
private final class AsByteSource extends ByteSource {
final Charset charset;
AsByteSource(Charset charset) {
this.charset = checkNotNull(charset);
}
@Override
public CharSource asCharSource(Charset charset) {
if (charset.equals(this.charset)) {
return CharSource.this;
}
return super.asCharSource(charset);
}
@Override
public InputStream openStream() throws IOException {
return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
}
@Override
public String toString() {
return CharSource.this.toString() + ".asByteSource(" + charset + ")";
}
}
private static class CharSequenceCharSource extends CharSource {
private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
private final CharSequence seq;
protected CharSequenceCharSource(CharSequence seq) {
this.seq = checkNotNull(seq);
}
@Override
public Reader openStream() {
return new CharSequenceReader(seq);
}
@Override
public String read() {
return seq.toString();
}
@Override
public boolean isEmpty() {
return seq.length() == 0;
}
@Override
public long length() {
return seq.length();
}
@Override
public Optional lengthIfKnown() {
return Optional.of((long) seq.length());
}
/**
* Returns an iterable over the lines in the string. If the string ends in a newline, a final
* empty string is not included to match the behavior of BufferedReader/LineReader.readLine().
*/
private Iterable lines() {
return new Iterable() {
@Override
public Iterator iterator() {
return new AbstractIterator() {
Iterator lines = LINE_SPLITTER.split(seq).iterator();
@Override
protected String computeNext() {
if (lines.hasNext()) {
String next = lines.next();
// skip last line if it's empty
if (lines.hasNext() || !next.isEmpty()) {
return next;
}
}
return endOfData();
}
};
}
};
}
@Override
public String readFirstLine() {
Iterator lines = lines().iterator();
return lines.hasNext() ? lines.next() : null;
}
@Override
public ImmutableList readLines() {
return ImmutableList.copyOf(lines());
}
@Override
public T readLines(LineProcessor processor) throws IOException {
for (String line : lines()) {
if (!processor.processLine(line)) {
break;
}
}
return processor.getResult();
}
@Override
public String toString() {
return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
}
}
private static final class EmptyCharSource extends CharSequenceCharSource {
private static final EmptyCharSource INSTANCE = new EmptyCharSource();
private EmptyCharSource() {
super("");
}
@Override
public String toString() {
return "CharSource.empty()";
}
}
private static final class ConcatenatedCharSource extends CharSource {
private final Iterable extends CharSource> sources;
ConcatenatedCharSource(Iterable extends CharSource> sources) {
this.sources = checkNotNull(sources);
}
@Override
public Reader openStream() throws IOException {
return new MultiReader(sources.iterator());
}
@Override
public boolean isEmpty() throws IOException {
for (CharSource source : sources) {
if (!source.isEmpty()) {
return false;
}
}
return true;
}
@Override
public Optional lengthIfKnown() {
long result = 0L;
for (CharSource source : sources) {
Optional lengthIfKnown = source.lengthIfKnown();
if (!lengthIfKnown.isPresent()) {
return Optional.absent();
}
result += lengthIfKnown.get();
}
return Optional.of(result);
}
@Override
public long length() throws IOException {
long result = 0L;
for (CharSource source : sources) {
result += source.length();
}
return result;
}
@Override
public String toString() {
return "CharSource.concat(" + sources + ")";
}
}
}