com.google.common.io.CharSource Maven / Gradle / Ivy
/*
* Copyright (C) 2012 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.common.io;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.common.annotations.Beta;
import com.google.common.base.Splitter;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
/**
* A readable source of characters, such as a text file. Unlike a {@link Reader}, a
* {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
* Instead, it is an immutable supplier of {@code Reader} instances.
*
* {@code CharSource} provides two kinds of methods:
*
* - Methods that return a reader: These methods should return a new, independent
* instance each time they are called. The caller is responsible for ensuring that the returned
* reader is closed.
*
- Convenience methods: These are implementations of common operations that are
* typically implemented by opening a reader using one of the methods in the first category,
* doing something and finally closing the reader that was opened.
*
*
* Several methods in this class, such as {@link #readLines()}, break the contents of the
* source into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
* {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
* there to be an empty line at the end if the contents are terminated with a line separator.
*
*
Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
* encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
*
* @since 14.0
* @author Colin Decker
*/
public abstract class CharSource {
/**
* Constructor for use by subclasses.
*/
protected CharSource() {}
/**
* Opens a new {@link Reader} for reading from this source. This method should return a new,
* independent reader each time it is called.
*
*
The caller is responsible for ensuring that the returned reader is closed.
*
* @throws IOException if an I/O error occurs in the process of opening the reader
*/
public abstract Reader openStream() throws IOException;
/**
* Opens a new {@link BufferedReader} for reading from this source. This method should return a
* new, independent reader each time it is called.
*
*
The caller is responsible for ensuring that the returned reader is closed.
*
* @throws IOException if an I/O error occurs in the process of opening the reader
*/
public BufferedReader openBufferedStream() throws IOException {
Reader reader = openStream();
return (reader instanceof BufferedReader)
? (BufferedReader) reader
: new BufferedReader(reader);
}
/**
* Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
* Does not close {@code appendable} if it is {@code Closeable}.
*
* @throws IOException if an I/O error occurs in the process of reading from this source or
* writing to {@code appendable}
*/
public long copyTo(Appendable appendable) throws IOException {
checkNotNull(appendable);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.copy(reader, appendable);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Copies the contents of this source to the given sink.
*
* @throws IOException if an I/O error occurs in the process of reading from this source or
* writing to {@code sink}
*/
public long copyTo(CharSink sink) throws IOException {
checkNotNull(sink);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
Writer writer = closer.register(sink.openStream());
return CharStreams.copy(reader, writer);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads the contents of this source as a string.
*
* @throws IOException if an I/O error occurs in the process of reading from this source
*/
public String read() throws IOException {
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.toString(reader);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads the first link of this source as a string. Returns {@code null} if this source is empty.
*
*
Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
* {@code \r\n}, does not include the line separator in the returned line and does not consider
* there to be an extra empty line at the end if the content is terminated with a line separator.
*
* @throws IOException if an I/O error occurs in the process of reading from this source
*/
@Nullable public String readFirstLine() throws IOException {
Closer closer = Closer.create();
try {
BufferedReader reader = closer.register(openBufferedStream());
return reader.readLine();
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads all the lines of this source as a list of strings. The returned list will be empty if
* this source is empty.
*
*
Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
* {@code \r\n}, does not include the line separator in the returned lines and does not consider
* there to be an extra empty line at the end if the content is terminated with a line separator.
*
* @throws IOException if an I/O error occurs in the process of reading from this source
*/
public ImmutableList readLines() throws IOException {
Closer closer = Closer.create();
try {
BufferedReader reader = closer.register(openBufferedStream());
List result = Lists.newArrayList();
String line;
while ((line = reader.readLine()) != null) {
result.add(line);
}
return ImmutableList.copyOf(result);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Reads lines of text from this source, processing each line as it is read using the given
* {@link LineProcessor processor}. Stops when all lines have been processed or the processor
* returns {@code false} and returns the result produced by the processor.
*
* Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
* {@code \r\n}, does not include the line separator in the lines passed to the {@code processor}
* and does not consider there to be an extra empty line at the end if the content is terminated
* with a line separator.
*
* @throws IOException if an I/O error occurs in the process of reading from this source or if
* {@code processor} throws an {@code IOException}
* @since 16.0
*/
@Beta
public T readLines(LineProcessor processor) throws IOException {
checkNotNull(processor);
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return CharStreams.readLines(reader, processor);
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Returns whether the source has zero chars. The default implementation is to open a stream and
* check for EOF.
*
* @throws IOException if an I/O error occurs
* @since 15.0
*/
public boolean isEmpty() throws IOException {
Closer closer = Closer.create();
try {
Reader reader = closer.register(openStream());
return reader.read() == -1;
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
* Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @since 15.0
*/
public static CharSource concat(Iterable extends CharSource> sources) {
return new ConcatenatedCharSource(sources);
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
*
Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
*
Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
* method is called. This will fail if the iterator is infinite and may cause problems if the
* iterator eagerly fetches data for each source when iterated (rather than producing sources
* that only load data through their streams). Prefer using the {@link #concat(Iterable)}
* overload if possible.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @throws NullPointerException if any of {@code sources} is {@code null}
* @since 15.0
*/
public static CharSource concat(Iterator extends CharSource> sources) {
return concat(ImmutableList.copyOf(sources));
}
/**
* Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
* the source will contain the concatenated data from the streams of the underlying sources.
*
*
Only one underlying stream will be open at a time. Closing the concatenated stream will
* close the open underlying stream.
*
* @param sources the sources to concatenate
* @return a {@code CharSource} containing the concatenated data
* @throws NullPointerException if any of {@code sources} is {@code null}
* @since 15.0
*/
public static CharSource concat(CharSource... sources) {
return concat(ImmutableList.copyOf(sources));
}
/**
* Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
* returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
* the {@code charSequence} is mutated while it is being read, so don't do that.
*
* @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
*/
public static CharSource wrap(CharSequence charSequence) {
return new CharSequenceCharSource(charSequence);
}
/**
* Returns an immutable {@link CharSource} that contains no characters.
*
* @since 15.0
*/
public static CharSource empty() {
return EmptyCharSource.INSTANCE;
}
private static class CharSequenceCharSource extends CharSource {
private static final Splitter LINE_SPLITTER
= Splitter.on(Pattern.compile("\r\n|\n|\r"));
private final CharSequence seq;
protected CharSequenceCharSource(CharSequence seq) {
this.seq = checkNotNull(seq);
}
@Override
public Reader openStream() {
return new CharSequenceReader(seq);
}
@Override
public String read() {
return seq.toString();
}
@Override
public boolean isEmpty() {
return seq.length() == 0;
}
/**
* Returns an iterable over the lines in the string. If the string ends in
* a newline, a final empty string is not included to match the behavior of
* BufferedReader/LineReader.readLine().
*/
private Iterable lines() {
return new Iterable() {
@Override
public Iterator iterator() {
return new AbstractIterator() {
Iterator lines = LINE_SPLITTER.split(seq).iterator();
@Override
protected String computeNext() {
if (lines.hasNext()) {
String next = lines.next();
// skip last line if it's empty
if (lines.hasNext() || !next.isEmpty()) {
return next;
}
}
return endOfData();
}
};
}
};
}
@Override
public String readFirstLine() {
Iterator lines = lines().iterator();
return lines.hasNext() ? lines.next() : null;
}
@Override
public ImmutableList readLines() {
return ImmutableList.copyOf(lines());
}
@Override
public T readLines(LineProcessor processor) throws IOException {
for (String line : lines()) {
if (!processor.processLine(line)) {
break;
}
}
return processor.getResult();
}
@Override
public String toString() {
return "CharSource.wrap(" + truncate(seq, 30, "...") + ")";
}
/**
* Truncates the given character sequence to the given maximum length. If the length of the
* sequence is greater than {@code maxLength}, the returned string will be exactly
* {@code maxLength} chars in length and will end with the given {@code truncationIndicator}.
* Otherwise, the sequence will be returned as a string with no changes to the content.
*
* Examples:
*
*
{@code
* truncate("foobar", 7, "..."); // returns "foobar"
* truncate("foobar", 5, "..."); // returns "fo..." }
*
* Note: This method may work with certain non-ASCII text but is not safe for
* use with arbitrary Unicode text. It is mostly intended for use with text that is known to be
* safe for use with it (such as all-ASCII text) and for simple debugging text. When using this
* method, consider the following:
*
*
* - it may split surrogate pairs
* - it may split characters and combining characters
* - it does not consider word boundaries
* - if truncating for display to users, there are other considerations that must be taken
* into account
* - the appropriate truncation indicator may be locale-dependent
* - it is safe to use non-ASCII characters in the truncation indicator
*
*
*
* @throws IllegalArgumentException if {@code maxLength} is less than the length of
* {@code truncationIndicator}
*/
/*
* TODO(user, cpovirk): Use Ascii.truncate once it is available in our internal copy of
* guava_jdk5.
*/
private static String truncate(CharSequence seq, int maxLength, String truncationIndicator) {
checkNotNull(seq);
// length to truncate the sequence to, not including the truncation indicator
int truncationLength = maxLength - truncationIndicator.length();
// in this worst case, this allows a maxLength equal to the length of the truncationIndicator,
// meaning that a string will be truncated to just the truncation indicator itself
checkArgument(truncationLength >= 0,
"maxLength (%s) must be >= length of the truncation indicator (%s)",
maxLength, truncationIndicator.length());
if (seq.length() <= maxLength) {
String string = seq.toString();
if (string.length() <= maxLength) {
return string;
}
// if the length of the toString() result was > maxLength for some reason, truncate that
seq = string;
}
return new StringBuilder(maxLength)
.append(seq, 0, truncationLength)
.append(truncationIndicator)
.toString();
}
}
private static final class EmptyCharSource extends CharSequenceCharSource {
private static final EmptyCharSource INSTANCE = new EmptyCharSource();
private EmptyCharSource() {
super("");
}
@Override
public String toString() {
return "CharSource.empty()";
}
}
private static final class ConcatenatedCharSource extends CharSource {
private final Iterable extends CharSource> sources;
ConcatenatedCharSource(Iterable extends CharSource> sources) {
this.sources = checkNotNull(sources);
}
@Override
public Reader openStream() throws IOException {
return new MultiReader(sources.iterator());
}
@Override
public boolean isEmpty() throws IOException {
for (CharSource source : sources) {
if (!source.isEmpty()) {
return false;
}
}
return true;
}
@Override
public String toString() {
return "CharSource.concat(" + sources + ")";
}
}
}