All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.io.CharSource Maven / Gradle / Ivy

Go to download

This artifact provides a single jar that contains all classes required to use remote EJB and JMS, including all dependencies. It is intended for use by those not using maven, maven users should just import the EJB and JMS BOM's instead (shaded JAR's cause lots of problems with maven, as it is very easy to inadvertently end up with different versions on classes on the class path).

There is a newer version: 34.0.0.Final
Show newest version
/*
 * Copyright (C) 2012 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.common.io;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.base.Ascii;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Streams;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import com.google.errorprone.annotations.MustBeClosed;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import java.util.stream.Stream;
import javax.annotation.CheckForNull;
import org.checkerframework.checker.nullness.qual.Nullable;

/**
 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
 * it is an immutable supplier of {@code Reader} instances.
 *
 * 

{@code CharSource} provides two kinds of methods: * *

    *
  • Methods that return a reader: These methods should return a new, independent * instance each time they are called. The caller is responsible for ensuring that the * returned reader is closed. *
  • Convenience methods: These are implementations of common operations that are * typically implemented by opening a reader using one of the methods in the first category, * doing something and finally closing the reader that was opened. *
* *

Several methods in this class, such as {@link #readLines()}, break the contents of the source * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to * be an empty line at the end if the contents are terminated with a line separator. * *

Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. * *

Note: In general, {@code CharSource} is intended to be used for "file-like" sources * that provide readers that are: * *

    *
  • Finite: Many operations, such as {@link #length()} and {@link #read()}, will either * block indefinitely or fail if the source creates an infinite reader. *
  • Non-destructive: A destructive reader will consume or otherwise alter the * source as they are read from it. A source that provides such readers will not be reusable, * and operations that read from the stream (including {@link #length()}, in some * implementations) will prevent further operations from completing as expected. *
* * @since 14.0 * @author Colin Decker */ @GwtIncompatible @ElementTypesAreNonnullByDefault public abstract class CharSource { /** Constructor for use by subclasses. */ protected CharSource() {} /** * Returns a {@link ByteSource} view of this char source that encodes chars read from this source * as bytes using the given {@link Charset}. * *

If {@link ByteSource#asCharSource} is called on the returned source with the same charset, * the default implementation of this method will ensure that the original {@code CharSource} is * returned, rather than round-trip encoding. Subclasses that override this method should behave * the same way. * * @since 20.0 */ @Beta public ByteSource asByteSource(Charset charset) { return new AsByteSource(charset); } /** * Opens a new {@link Reader} for reading from this source. This method returns a new, independent * reader each time it is called. * *

The caller is responsible for ensuring that the returned reader is closed. * * @throws IOException if an I/O error occurs while opening the reader */ public abstract Reader openStream() throws IOException; /** * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, * independent reader each time it is called. * *

The caller is responsible for ensuring that the returned reader is closed. * * @throws IOException if an I/O error occurs while of opening the reader */ public BufferedReader openBufferedStream() throws IOException { Reader reader = openStream(); return (reader instanceof BufferedReader) ? (BufferedReader) reader : new BufferedReader(reader); } /** * Opens a new {@link Stream} for reading text one line at a time from this source. This method * returns a new, independent stream each time it is called. * *

The returned stream is lazy and only reads from the source in the terminal operation. If an * I/O error occurs while the stream is reading from the source or when the stream is closed, an * {@link UncheckedIOException} is thrown. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * *

The caller is responsible for ensuring that the returned stream is closed. For example: * *

{@code
   * try (Stream lines = source.lines()) {
   *   lines.map(...)
   *      .filter(...)
   *      .forEach(...);
   * }
   * }
* * @throws IOException if an I/O error occurs while opening the stream * @since 22.0 */ @Beta @MustBeClosed public Stream lines() throws IOException { BufferedReader reader = openBufferedStream(); return reader .lines() .onClose( () -> { try { reader.close(); } catch (IOException e) { throw new UncheckedIOException(e); } }); } /** * Returns the size of this source in chars, if the size can be easily determined without actually * opening the data stream. * *

The default implementation returns {@link Optional#absent}. Some sources, such as a {@code * CharSequence}, may return a non-absent value. Note that in such cases, it is possible * that this method will return a different number of chars than would be returned by reading all * of the chars. * *

Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may * return a different number of chars if the contents are changed. * * @since 19.0 */ @Beta public Optional lengthIfKnown() { return Optional.absent(); } /** * Returns the length of this source in chars, even if doing so requires opening and traversing an * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. * *

The default implementation calls {@link #lengthIfKnown} and returns the value if present. If * absent, it will fall back to a heavyweight operation that will open a stream, {@link * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that * were skipped. * *

Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient * implementation, it is possible that this method will return a different number of chars * than would be returned by reading all of the chars. * *

In either case, for mutable sources such as files, a subsequent read may return a different * number of chars if the contents are changed. * * @throws IOException if an I/O error occurs while reading the length of this source * @since 19.0 */ @Beta public long length() throws IOException { Optional lengthIfKnown = lengthIfKnown(); if (lengthIfKnown.isPresent()) { return lengthIfKnown.get(); } Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return countBySkipping(reader); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } private long countBySkipping(Reader reader) throws IOException { long count = 0; long read; while ((read = reader.skip(Long.MAX_VALUE)) != 0) { count += read; } return count; } /** * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). * Does not close {@code appendable} if it is {@code Closeable}. * * @return the number of characters copied * @throws IOException if an I/O error occurs while reading from this source or writing to {@code * appendable} */ @CanIgnoreReturnValue public long copyTo(Appendable appendable) throws IOException { checkNotNull(appendable); Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return CharStreams.copy(reader, appendable); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Copies the contents of this source to the given sink. * * @return the number of characters copied * @throws IOException if an I/O error occurs while reading from this source or writing to {@code * sink} */ @CanIgnoreReturnValue public long copyTo(CharSink sink) throws IOException { checkNotNull(sink); Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); Writer writer = closer.register(sink.openStream()); return CharStreams.copy(reader, writer); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads the contents of this source as a string. * * @throws IOException if an I/O error occurs while reading from this source */ public String read() throws IOException { Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return CharStreams.toString(reader); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads the first line of this source as a string. Returns {@code null} if this source is empty. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * * @throws IOException if an I/O error occurs while reading from this source */ @CheckForNull public String readFirstLine() throws IOException { Closer closer = Closer.create(); try { BufferedReader reader = closer.register(openBufferedStream()); return reader.readLine(); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads all the lines of this source as a list of strings. The returned list will be empty if * this source is empty. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * * @throws IOException if an I/O error occurs while reading from this source */ public ImmutableList readLines() throws IOException { Closer closer = Closer.create(); try { BufferedReader reader = closer.register(openBufferedStream()); List result = Lists.newArrayList(); String line; while ((line = reader.readLine()) != null) { result.add(line); } return ImmutableList.copyOf(result); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads lines of text from this source, processing each line as it is read using the given {@link * LineProcessor processor}. Stops when all lines have been processed or the processor returns * {@code false} and returns the result produced by the processor. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * * @throws IOException if an I/O error occurs while reading from this source or if {@code * processor} throws an {@code IOException} * @since 16.0 */ @Beta @CanIgnoreReturnValue // some processors won't return a useful result @ParametricNullness public T readLines(LineProcessor processor) throws IOException { checkNotNull(processor); Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return CharStreams.readLines(reader, processor); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Reads all lines of text from this source, running the given {@code action} for each line as it * is read. * *

Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code * \n}. If the source's content does not end in a line termination sequence, it is treated as if * it does. * * @throws IOException if an I/O error occurs while reading from this source or if {@code action} * throws an {@code UncheckedIOException} * @since 22.0 */ @Beta public void forEachLine(Consumer action) throws IOException { try (Stream lines = lines()) { // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure lines.forEachOrdered(action); } catch (UncheckedIOException e) { throw e.getCause(); } } /** * Returns whether the source has zero chars. The default implementation first checks {@link * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. * *

Note that, in cases where {@code lengthIfKnown} returns zero, it is possible that * chars are actually available for reading. This means that a source may return {@code true} from * {@code isEmpty()} despite having readable content. * * @throws IOException if an I/O error occurs * @since 15.0 */ public boolean isEmpty() throws IOException { Optional lengthIfKnown = lengthIfKnown(); if (lengthIfKnown.isPresent()) { return lengthIfKnown.get() == 0L; } Closer closer = Closer.create(); try { Reader reader = closer.register(openStream()); return reader.read() == -1; } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } /** * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * * @param sources the sources to concatenate * @return a {@code CharSource} containing the concatenated data * @since 15.0 */ public static CharSource concat(Iterable sources) { return new ConcatenatedCharSource(sources); } /** * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * *

Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method * is called. This will fail if the iterator is infinite and may cause problems if the iterator * eagerly fetches data for each source when iterated (rather than producing sources that only * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if * possible. * * @param sources the sources to concatenate * @return a {@code CharSource} containing the concatenated data * @throws NullPointerException if any of {@code sources} is {@code null} * @since 15.0 */ public static CharSource concat(Iterator sources) { return concat(ImmutableList.copyOf(sources)); } /** * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from * the source will contain the concatenated data from the streams of the underlying sources. * *

Only one underlying stream will be open at a time. Closing the concatenated stream will * close the open underlying stream. * * @param sources the sources to concatenate * @return a {@code CharSource} containing the concatenated data * @throws NullPointerException if any of {@code sources} is {@code null} * @since 15.0 */ public static CharSource concat(CharSource... sources) { return concat(ImmutableList.copyOf(sources)); } /** * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if * the {@code charSequence} is mutated while it is being read, so don't do that. * * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) */ public static CharSource wrap(CharSequence charSequence) { return charSequence instanceof String ? new StringCharSource((String) charSequence) : new CharSequenceCharSource(charSequence); } /** * Returns an immutable {@link CharSource} that contains no characters. * * @since 15.0 */ public static CharSource empty() { return EmptyCharSource.INSTANCE; } /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ private final class AsByteSource extends ByteSource { final Charset charset; AsByteSource(Charset charset) { this.charset = checkNotNull(charset); } @Override public CharSource asCharSource(Charset charset) { if (charset.equals(this.charset)) { return CharSource.this; } return super.asCharSource(charset); } @Override public InputStream openStream() throws IOException { return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); } @Override public String toString() { return CharSource.this.toString() + ".asByteSource(" + charset + ")"; } } private static class CharSequenceCharSource extends CharSource { private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); protected final CharSequence seq; protected CharSequenceCharSource(CharSequence seq) { this.seq = checkNotNull(seq); } @Override public Reader openStream() { return new CharSequenceReader(seq); } @Override public String read() { return seq.toString(); } @Override public boolean isEmpty() { return seq.length() == 0; } @Override public long length() { return seq.length(); } @Override public Optional lengthIfKnown() { return Optional.of((long) seq.length()); } /** * Returns an iterator over the lines in the string. If the string ends in a newline, a final * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). */ private Iterator linesIterator() { return new AbstractIterator() { Iterator lines = LINE_SPLITTER.split(seq).iterator(); @Override @CheckForNull protected String computeNext() { if (lines.hasNext()) { String next = lines.next(); // skip last line if it's empty if (lines.hasNext() || !next.isEmpty()) { return next; } } return endOfData(); } }; } @Override public Stream lines() { return Streams.stream(linesIterator()); } @Override @CheckForNull public String readFirstLine() { Iterator lines = linesIterator(); return lines.hasNext() ? lines.next() : null; } @Override public ImmutableList readLines() { return ImmutableList.copyOf(linesIterator()); } @Override @ParametricNullness public T readLines(LineProcessor processor) throws IOException { Iterator lines = linesIterator(); while (lines.hasNext()) { if (!processor.processLine(lines.next())) { break; } } return processor.getResult(); } @Override public String toString() { return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; } } /** * Subclass specialized for string instances. * *

Since Strings are immutable and built into the jdk we can optimize some operations * *

    *
  • use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by * one with {@link CharSequence#charAt(int)}. *
  • use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length * can't change, and it is faster because many writers and appendables are optimized for * appending string instances. *
*/ private static class StringCharSource extends CharSequenceCharSource { protected StringCharSource(String seq) { super(seq); } @Override public Reader openStream() { return new StringReader((String) seq); } @Override public long copyTo(Appendable appendable) throws IOException { appendable.append(seq); return seq.length(); } @Override public long copyTo(CharSink sink) throws IOException { checkNotNull(sink); Closer closer = Closer.create(); try { Writer writer = closer.register(sink.openStream()); writer.write((String) seq); return seq.length(); } catch (Throwable e) { throw closer.rethrow(e); } finally { closer.close(); } } } private static final class EmptyCharSource extends StringCharSource { private static final EmptyCharSource INSTANCE = new EmptyCharSource(); private EmptyCharSource() { super(""); } @Override public String toString() { return "CharSource.empty()"; } } private static final class ConcatenatedCharSource extends CharSource { private final Iterable sources; ConcatenatedCharSource(Iterable sources) { this.sources = checkNotNull(sources); } @Override public Reader openStream() throws IOException { return new MultiReader(sources.iterator()); } @Override public boolean isEmpty() throws IOException { for (CharSource source : sources) { if (!source.isEmpty()) { return false; } } return true; } @Override public Optional lengthIfKnown() { long result = 0L; for (CharSource source : sources) { Optional lengthIfKnown = source.lengthIfKnown(); if (!lengthIfKnown.isPresent()) { return Optional.absent(); } result += lengthIfKnown.get(); } return Optional.of(result); } @Override public long length() throws IOException { long result = 0L; for (CharSource source : sources) { result += source.length(); } return result; } @Override public String toString() { return "CharSource.concat(" + sources + ")"; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy