All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.io.CharSource Maven / Gradle / Ivy

There is a newer version: 30.1
Show newest version
/*
 * Copyright (C) 2012 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.common.io;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.base.Ascii;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Streams;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import java.util.stream.Stream;


/**
 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
 * it is an immutable supplier of {@code Reader} instances.
 *
 * 

{@code CharSource} provides two kinds of methods: * *

    *
  • Methods that return a reader: These methods should return a new, independent * instance each time they are called. The caller is responsible for ensuring that the * returned reader is closed. *
  • Convenience methods: These are implementations of common operations that are * typically implemented by opening a reader using one of the methods in the first category, * doing something and finally closing the reader that was opened. *
* *

Several methods in this class, such as {@link #readLines()}, break the contents of the source * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to * be an empty line at the end if the contents are terminated with a line separator. * *

Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. * * @since 14.0 * @author Colin Decker */ @GwtIncompatible public abstract class CharSource { /** Constructor for use by subclasses. */ protected CharSource() {} /** * Returns a {@link ByteSource} view of this char source that encodes chars read from this source * as bytes using the given {@link Charset}. * *

If {@link ByteSource#asCharSource} is called on the returned source with the same charset, * the default implementation of this method will ensure that the original {@code CharSource} is * returned, rather than round-trip encoding. Subclasses that override this method should behave * the same way. * * @since 20.0 */ @Beta public ByteSource asByteSource(Charset charset) { return new AsByteSource(charset); } /** * Opens a new {@link Reader} for reading from this source. This method returns a new, independent * reader each time it is called. * *

The caller is responsible for ensuring that the returned reader is closed. * * @throws IOException if an I/O error occurs while opening the reader */ public abstract Reader openStream() throws IOException; /** * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, * independent reader each time it is called. * *

The caller is responsible for ensuring that the returned reader is closed. * * @throws IOException if an I/O error occurs while of opening the reader */ public BufferedReader openBufferedStream() throws IOException { Reader reader = openStream(); return (reader instanceof BufferedReader) ? (BufferedReader) reader : new BufferedReader(reader); } /** * Opens a new {@link Stream} for reading text one line at a time from this source. This method * returns a new, independent stream each time it is called. * *

The returned stream is lazy and only reads from the source in the terminal operation. If an * I/O error occurs while the stream is reading from the source or when the stream is closed, an * {@link UncheckedIOException} is thrown. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * *

The caller is responsible for ensuring that the returned stream is closed. For example: * *

{@code
   * try (Stream lines = source.lines()) {
   *   lines.map(...)
   *      .filter(...)
   *      .forEach(...);
   * }
   * }
* * @throws IOException if an I/O error occurs while opening the stream * @since 22.0 */ @Beta public Stream lines() throws IOException { BufferedReader reader = openBufferedStream(); return reader .lines() .onClose( () -> { try { reader.close(); } catch (IOException e) { throw new UncheckedIOException(e); } }); } /** * Returns the size of this source in chars, if the size can be easily determined without actually * opening the data stream. * *

The default implementation returns {@link Optional#absent}. Some sources, such as a {@code * CharSequence}, may return a non-absent value. Note that in such cases, it is possible * that this method will return a different number of chars than would be returned by reading all * of the chars. * *

Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may * return a different number of chars if the contents are changed. * * @since 19.0 */ @Beta public Optional lengthIfKnown() { return Optional.absent(); } /** * Returns the length of this source in chars, even if doing so requires opening and traversing an * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. * *

The default implementation calls {@link #lengthIfKnown} and returns the value if present. If * absent, it will fall back to a heavyweight operation that will open a stream, {@link * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that * were skipped. * *

Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient * implementation, it is possible that this method will return a different number of chars * than would be returned by reading all of the chars. * *

In either case, for mutable sources such as files, a subsequent read may return a different * number of chars if the contents are changed. * * @throws IOException if an I/O error occurs while reading the length of this source * @since 19.0 */ @Beta public long length() throws IOException { Optional lengthIfKnown = lengthIfKnown(); if (lengthIfKnown.isPresent()) { return lengthIfKnown.get(); } Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return countBySkipping(reader); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } private long countBySkipping(Reader reader) throws IOException { long count = 0; long read; while ((read = reader.skip(Long.MAX_VALUE)) != 0) { count += read; } return count; } /** * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). * Does not close {@code appendable} if it is {@code Closeable}. * * @return the number of characters copied * @throws IOException if an I/O error occurs while reading from this source or writing to {@code * appendable} */ public long copyTo(Appendable appendable) throws IOException { checkNotNull(appendable); Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return CharStreams.copy(reader, appendable); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Copies the contents of this source to the given sink. * * @return the number of characters copied * @throws IOException if an I/O error occurs while reading from this source or writing to {@code * sink} */ public long copyTo(CharSink sink) throws IOException { checkNotNull(sink); Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); Writer writer = closer.register(sink.openStream()); return CharStreams.copy(reader, writer); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads the contents of this source as a string. * * @throws IOException if an I/O error occurs while reading from this source */ public String read() throws IOException { Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return CharStreams.toString(reader); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads the first line of this source as a string. Returns {@code null} if this source is empty. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * * @throws IOException if an I/O error occurs while reading from this source */ public String readFirstLine() throws IOException { Closer closer = Closer.create(); try { BufferedReader reader = closer.register(openBufferedStream()); return reader.readLine(); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads all the lines of this source as a list of strings. The returned list will be empty if * this source is empty. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * * @throws IOException if an I/O error occurs while reading from this source */ public ImmutableList readLines() throws IOException { Closer closer = Closer.create(); try { BufferedReader reader = closer.register(openBufferedStream()); List result = Lists.newArrayList(); String line; while ((line = reader.readLine()) != null) { result.add(line); } return ImmutableList.copyOf(result); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads lines of text from this source, processing each line as it is read using the given {@link * LineProcessor processor}. Stops when all lines have been processed or the processor returns * {@code false} and returns the result produced by the processor. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * * @throws IOException if an I/O error occurs while reading from this source or if {@code * processor} throws an {@code IOException} * @since 16.0 */ @Beta // some processors won't return a useful result public T readLines(LineProcessor processor) throws IOException { checkNotNull(processor); Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return CharStreams.readLines(reader, processor); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads all lines of text from this source, running the given {@code action} for each line as it * is read. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * * @throws IOException if an I/O error occurs while reading from this source or if {@code action} * throws an {@code UncheckedIOException} * @since 22.0 */ @Beta public void forEachLine(Consumer action) throws IOException { try (Stream lines = lines()) { // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure lines.forEachOrdered(action); } catch (UncheckedIOException e) { throw e.getCause(); } } /** * Returns whether the source has zero chars. The default implementation first checks {@link * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. * *

Note that, in cases where {@code lengthIfKnown} returns zero, it is possible that * chars are actually available for reading. This means that a source may return {@code true} from * {@code isEmpty()} despite having readable content. * * @throws IOException if an I/O error occurs * @since 15.0 */ public boolean isEmpty() throws IOException { Optional lengthIfKnown = lengthIfKnown(); if (lengthIfKnown.isPresent()) { return lengthIfKnown.get() == 0L; } Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return reader.read() == -1; } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * * @param sources the sources to concatenate * @return a {@code CharSource} containing the concatenated data * @since 15.0 */ public static CharSource concat(Iterable sources) { return new ConcatenatedCharSource(sources); } /** * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * *

Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method * is called. This will fail if the iterator is infinite and may cause problems if the iterator * eagerly fetches data for each source when iterated (rather than producing sources that only * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if * possible. * * @param sources the sources to concatenate * @return a {@code CharSource} containing the concatenated data * @throws NullPointerException if any of {@code sources} is {@code null} * @since 15.0 */ public static CharSource concat(Iterator sources) { return concat(ImmutableList.copyOf(sources)); } /** * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * * @param sources the sources to concatenate * @return a {@code CharSource} containing the concatenated data * @throws NullPointerException if any of {@code sources} is {@code null} * @since 15.0 */ public static CharSource concat(CharSource... sources) { return concat(ImmutableList.copyOf(sources)); } /** * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if * the {@code charSequence} is mutated while it is being read, so don't do that. * * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) */ public static CharSource wrap(CharSequence charSequence) { return charSequence instanceof String ? new StringCharSource((String) charSequence) : new CharSequenceCharSource(charSequence); } /** * Returns an immutable {@link CharSource} that contains no characters. * * @since 15.0 */ public static CharSource empty() { return EmptyCharSource.INSTANCE; } /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ private final class AsByteSource extends ByteSource { final Charset charset; AsByteSource(Charset charset) { this.charset = checkNotNull(charset); } @Override public CharSource asCharSource(Charset charset) { if (charset.equals(this.charset)) { return CharSource.this; } return super.asCharSource(charset); } @Override public InputStream openStream() throws IOException { return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); } @Override public String toString() { return CharSource.this.toString() + ".asByteSource(" + charset + ")"; } } private static class CharSequenceCharSource extends CharSource { private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); protected final CharSequence seq; protected CharSequenceCharSource(CharSequence seq) { this.seq = checkNotNull(seq); } @Override public Reader openStream() { return new CharSequenceReader(seq); } @Override public String read() { return seq.toString(); } @Override public boolean isEmpty() { return seq.length() == 0; } @Override public long length() { return seq.length(); } @Override public Optional lengthIfKnown() { return Optional.of((long) seq.length()); } /** * Returns an iterator over the lines in the string. If the string ends in a newline, a final * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). */ private Iterator linesIterator() { return new AbstractIterator() { Iterator lines = LINE_SPLITTER.split(seq).iterator(); @Override protected String computeNext() { if (lines.hasNext()) { String next = lines.next(); // skip last line if it's empty if (lines.hasNext() || !next.isEmpty()) { return next; } } return endOfData(); } }; } @Override public Stream lines() { return Streams.stream(linesIterator()); } @Override public String readFirstLine() { Iterator lines = linesIterator(); return lines.hasNext() ? lines.next() : null; } @Override public ImmutableList readLines() { return ImmutableList.copyOf(linesIterator()); } @Override public T readLines(LineProcessor processor) throws IOException { Iterator lines = linesIterator(); while (lines.hasNext()) { if (!processor.processLine(lines.next())) { break; } } return processor.getResult(); } @Override public String toString() { return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; } } /** * Subclass specialized for string instances. * *

Since Strings are immutable and built into the jdk we can optimize some operations * *

    *
  • use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by * one with {@link CharSequence#charAt(int)}. *
  • use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length * can't change, and it is faster because many writers and appendables are optimized for * appending string instances. *
*/ private static class StringCharSource extends CharSequenceCharSource { protected StringCharSource(String seq) { super(seq); } @Override public Reader openStream() { return new StringReader((String) seq); } @Override public long copyTo(Appendable appendable) throws IOException { appendable.append(seq); return seq.length(); } @Override public long copyTo(CharSink sink) throws IOException { checkNotNull(sink); Closer closer = Closer.create(); try { Writer writer = closer.register(sink.openStream()); writer.write((String) seq); return seq.length(); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } } private static final class EmptyCharSource extends StringCharSource { private static final EmptyCharSource INSTANCE = new EmptyCharSource(); private EmptyCharSource() { super(""); } @Override public String toString() { return "CharSource.empty()"; } } private static final class ConcatenatedCharSource extends CharSource { private final Iterable sources; ConcatenatedCharSource(Iterable sources) { this.sources = checkNotNull(sources); } @Override public Reader openStream() throws IOException { return new MultiReader(sources.iterator()); } @Override public boolean isEmpty() throws IOException { for (CharSource source : sources) { if (!source.isEmpty()) { return false; } } return true; } @Override public Optional lengthIfKnown() { long result = 0L; for (CharSource source : sources) { Optional lengthIfKnown = source.lengthIfKnown(); if (!lengthIfKnown.isPresent()) { return Optional.absent(); } result += lengthIfKnown.get(); } return Optional.of(result); } @Override public long length() throws IOException { long result = 0L; for (CharSource source : sources) { result += source.length(); } return result; } @Override public String toString() { return "CharSource.concat(" + sources + ")"; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy