de.siegmar.fastcsv.reader.CsvReader Maven / Gradle / Ivy
Show all versions of fastcsv Show documentation
package de.siegmar.fastcsv.reader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Spliterator;
import java.util.StringJoiner;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
/**
* This is the main class for reading CSV data.
*
* Example use:
* {@snippet :
* try (CsvReader csv = CsvReader.builder().ofCsvRecord(file)) {
* for (CsvRecord csvRecord : csv) {
* // ...
* }
* }
*}
*
* Example for named records:
* {@snippet :
* try (CsvReader csv = CsvReader.builder().ofNamedCsvRecord(file)) {
* for (NamedCsvRecord csvRecord : csv) {
* // ...
* }
* }
*}
*
* @param the type of the CSV record.
*/
public final class CsvReader implements Iterable, Closeable {
private final CsvParser csvParser;
private final CsvCallbackHandler callbackHandler;
private final CommentStrategy commentStrategy;
private final boolean skipEmptyLines;
private final boolean ignoreDifferentFieldCount;
private final CloseableIterator csvRecordIterator = new CsvRecordIterator();
private int firstRecordFieldCount = -1;
@SuppressWarnings("checkstyle:ParameterNumber")
CsvReader(final CsvParser csvParser, final CsvCallbackHandler callbackHandler,
final CommentStrategy commentStrategy, final boolean skipEmptyLines,
final boolean ignoreDifferentFieldCount) {
this.csvParser = csvParser;
this.callbackHandler = callbackHandler;
this.commentStrategy = commentStrategy;
this.skipEmptyLines = skipEmptyLines;
this.ignoreDifferentFieldCount = ignoreDifferentFieldCount;
}
/**
* Constructs a {@link CsvReaderBuilder} to configure and build instances of this class.
*
* @return a new {@link CsvReaderBuilder} instance.
*/
public static CsvReaderBuilder builder() {
return new CsvReaderBuilder();
}
/**
* Returns an iterator over elements of type {@link CsvRecord}.
*
* The returned iterator is not thread-safe.
* Don't forget to close the returned iterator when you're done.
* Alternatively, use {@link #stream()}.
*
* This method is idempotent.
*
* @return an iterator over the CSV records.
* @throws UncheckedIOException if an I/O error occurs.
* @throws CsvParseException if any other problem occurs when parsing the CSV data.
* @see #stream()
*/
@Override
public CloseableIterator iterator() {
return csvRecordIterator;
}
/**
* Returns a {@link Spliterator} over elements of type {@link CsvRecord}.
*
* The returned spliterator is not thread-safe.
* Don't forget to invoke {@link #close()} when you're done.
* Alternatively, use {@link #stream()}.
*
* This method is idempotent.
*
* @return a spliterator over the CSV records.
* @throws UncheckedIOException if an I/O error occurs.
* @throws CsvParseException if any other problem occurs when parsing the CSV data.
* @see #stream()
*/
@Override
public Spliterator spliterator() {
return new CsvSpliterator();
}
/**
* Returns a sequential {@code Stream} with this reader as its source.
*
* The returned stream is not thread-safe.
* Don't forget to close the returned stream when you're done.
*
* This method is idempotent.
*
* @return a sequential {@code Stream} over the CSV records.
* @throws UncheckedIOException if an I/O error occurs.
* @throws CsvParseException if any other problem occurs when parsing the CSV data.
* @see #iterator()
*/
public Stream stream() {
return StreamSupport.stream(spliterator(), false)
.onClose(() -> {
try {
close();
} catch (final IOException e) {
throw new UncheckedIOException(e);
}
});
}
@SuppressWarnings({
"PMD.AvoidBranchingStatementAsLastInLoop",
"PMD.AssignmentInOperand"
})
private T fetchRecord() throws IOException {
while (csvParser.parse()) {
final T csvRecord = processRecord();
if (csvRecord != null) {
return csvRecord;
}
}
callbackHandler.terminate();
return null;
}
@SuppressWarnings("checkstyle:ReturnCount")
private T processRecord() {
final RecordWrapper recordWrapper = callbackHandler.buildRecord();
// handle consumed records (e.g. header for named records)
if (recordWrapper == null) {
return null;
}
// handle comment lines
if (recordWrapper.isComment()) {
return commentStrategy == CommentStrategy.SKIP ? null : recordWrapper.getWrappedRecord();
}
// handle empty lines
if (recordWrapper.isEmptyLine()) {
return skipEmptyLines ? null : recordWrapper.getWrappedRecord();
}
// check field count consistency
if (!ignoreDifferentFieldCount) {
checkFieldCountConsistency(recordWrapper.getFieldCount());
}
return recordWrapper.getWrappedRecord();
}
private void checkFieldCountConsistency(final int fieldCount) {
// check the field count consistency on every record
if (firstRecordFieldCount == -1) {
firstRecordFieldCount = fieldCount;
} else if (fieldCount != firstRecordFieldCount) {
throw new CsvParseException(
String.format("Record %d has %d fields, but first record had %d fields",
csvParser.getStartingLineNumber(), fieldCount, firstRecordFieldCount));
}
}
@Override
public void close() throws IOException {
csvParser.close();
}
@Override
public String toString() {
return new StringJoiner(", ", CsvReader.class.getSimpleName() + "[", "]")
.add("commentStrategy=" + commentStrategy)
.add("skipEmptyLines=" + skipEmptyLines)
.add("ignoreDifferentFieldCount=" + ignoreDifferentFieldCount)
.toString();
}
@SuppressWarnings({"checkstyle:IllegalCatch", "PMD.AvoidCatchingThrowable"})
private T fetch() {
try {
return fetchRecord();
} catch (final IOException e) {
throw new UncheckedIOException(buildExceptionMessage(), e);
} catch (final Throwable t) {
throw new CsvParseException(buildExceptionMessage(), t);
}
}
private String buildExceptionMessage() {
return (csvParser.getStartingLineNumber() == 1)
? "Exception when reading first record"
: String.format("Exception when reading record that started in line %d",
csvParser.getStartingLineNumber());
}
private class CsvSpliterator implements Spliterator {
@Override
public boolean tryAdvance(final Consumer super T> action) {
final T t = fetch();
if (t != null) {
action.accept(t);
return true;
}
return false;
}
@Override
public Spliterator trySplit() {
return null;
}
@Override
public long estimateSize() {
return Long.MAX_VALUE;
}
@Override
public int characteristics() {
return ORDERED | NONNULL;
}
}
private class CsvRecordIterator implements CloseableIterator {
private T fetchedRecord;
private boolean fetched;
@Override
public boolean hasNext() {
if (!fetched) {
fetchedRecord = fetch();
fetched = true;
}
return fetchedRecord != null;
}
@Override
public T next() {
if (!fetched) {
fetchedRecord = fetch();
}
if (fetchedRecord == null) {
throw new NoSuchElementException();
}
fetched = false;
return fetchedRecord;
}
@Override
public void close() throws IOException {
CsvReader.this.close();
}
}
/**
* This builder is used to create configured instances of {@link CsvReader}. The default
* configuration of this class complies with RFC 4180:
*
* - Field separator: {@code ,} (comma)
* - Quote character: {@code "} (double quotes)
* - Comment strategy: {@link CommentStrategy#NONE} (as RFC doesn't handle comments)
* - Comment character: {@code #} (hash) (in case comment strategy is enabled)
* - Skip empty lines: {@code true}
* - Ignore different field count: {@code true}
* - Detect BOM header: {@code false}
*
*
* The line delimiter (line-feed, carriage-return or the combination of both) is detected
* automatically and thus not configurable.
*/
@SuppressWarnings({"checkstyle:HiddenField", "PMD.AvoidFieldNameMatchingMethodName"})
public static final class CsvReaderBuilder {
private char fieldSeparator = ',';
private char quoteCharacter = '"';
private CommentStrategy commentStrategy = CommentStrategy.NONE;
private char commentCharacter = '#';
private boolean skipEmptyLines = true;
private boolean ignoreDifferentFieldCount = true;
private boolean detectBomHeader;
private CsvReaderBuilder() {
}
/**
* Sets the {@code fieldSeparator} used when reading CSV data.
*
* @param fieldSeparator the field separator character (default: {@code ,} - comma).
* @return This updated object, allowing additional method calls to be chained together.
*/
public CsvReaderBuilder fieldSeparator(final char fieldSeparator) {
this.fieldSeparator = fieldSeparator;
return this;
}
/**
* Sets the {@code quoteCharacter} used when reading CSV data.
*
* @param quoteCharacter the character used to enclose fields
* (default: {@code "} - double quotes).
* @return This updated object, allowing additional method calls to be chained together.
*/
public CsvReaderBuilder quoteCharacter(final char quoteCharacter) {
this.quoteCharacter = quoteCharacter;
return this;
}
/**
* Sets the strategy that defines how (and if) commented lines should be handled
* (default: {@link CommentStrategy#NONE} as comments are not defined in RFC 4180).
*
* If a comment strategy other than {@link CommentStrategy#NONE} is used, special parsing rules are
* applied for commented lines. FastCSV defines a comment as a line that starts with a comment character.
* No (whitespace) character is allowed before the comment character. Everything after the comment character
* until the end of the line is considered the comment value.
*
* @param commentStrategy the strategy for handling comments.
* @return This updated object, allowing additional method calls to be chained together.
* @see #commentCharacter(char)
*/
public CsvReaderBuilder commentStrategy(final CommentStrategy commentStrategy) {
this.commentStrategy = commentStrategy;
return this;
}
/**
* Sets the {@code commentCharacter} used to comment lines.
*
* @param commentCharacter the character used to comment lines (default: {@code #} - hash)
* @return This updated object, allowing additional method calls to be chained together.
* @see #commentStrategy(CommentStrategy)
*/
public CsvReaderBuilder commentCharacter(final char commentCharacter) {
this.commentCharacter = commentCharacter;
return this;
}
/**
* Defines whether empty lines should be skipped when reading data.
*
* The default implementation interprets empty lines as lines that do not contain any data.
* This includes lines that consist only of opening and closing quote characters.
*
* A line that only contains whitespace characters is not considered empty.
* However, the determination of empty lines is done after field modifiers have been applied.
* If you use a field trimming modifier (like {@link FieldModifiers#TRIM}), lines that only contain whitespaces
* are considered empty.
*
* Commented lines are not considered empty lines. Use {@link #commentStrategy(CommentStrategy)} for handling
* commented lines.
*
* @param skipEmptyLines Whether empty lines should be skipped (default: {@code true}).
* @return This updated object, allowing additional method calls to be chained together.
*/
public CsvReaderBuilder skipEmptyLines(final boolean skipEmptyLines) {
this.skipEmptyLines = skipEmptyLines;
return this;
}
/**
* Defines if an {@link CsvParseException} should be thrown if records do contain a
* different number of fields.
*
* @param ignoreDifferentFieldCount if exception should be suppressed, when CSV data contains
* different field count (default: {@code true}).
* @return This updated object, allowing additional method calls to be chained together.
*/
public CsvReaderBuilder ignoreDifferentFieldCount(final boolean ignoreDifferentFieldCount) {
this.ignoreDifferentFieldCount = ignoreDifferentFieldCount;
return this;
}
/**
* Defines if an optional BOM (Byte order mark) header should be detected.
* BOM detection only applies for direct file access.
*
* Supported BOMs are: UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE.
*
* @param detectBomHeader if detection should be enabled (default: {@code false})
* @return This updated object, allowing additional method calls to be chained together.
*/
public CsvReaderBuilder detectBomHeader(final boolean detectBomHeader) {
this.detectBomHeader = detectBomHeader;
return this;
}
/**
* Constructs a new {@link CsvReader} that uses {@link CsvRecord} as record type.
*
* This is a convenience method for calling {@link #build(CsvCallbackHandler, Reader)} with
* {@link CsvRecordHandler} as callback handler.
*
* @param reader the data source to read from.
* @return a new CsvReader of CsvRecord - never {@code null}.
* @throws NullPointerException if reader is {@code null}
*/
public CsvReader ofCsvRecord(final Reader reader) {
return build(new CsvRecordHandler(), reader);
}
/**
* Constructs a new {@link CsvReader} for the specified arguments.
*
* This is a convenience method for calling {@link #build(CsvCallbackHandler, String)} with
* {@link CsvRecordHandler} as callback handler.
*
* @param data the data to read.
* @return a new CsvReader of CsvRecord - never {@code null}.
* @throws NullPointerException if data is {@code null}
*/
public CsvReader ofCsvRecord(final String data) {
return build(new CsvRecordHandler(), data);
}
/**
* Constructs a new {@link CsvReader} for the specified file.
*
* This is a convenience method for calling {@link #build(CsvCallbackHandler, Path)} with
* {@link CsvRecordHandler} as callback handler.
*
* @param file the file to read data from.
* @return a new CsvReader of CsvRecord - never {@code null}. Don't forget to close it!
* @throws IOException if an I/O error occurs.
* @throws NullPointerException if file is {@code null}
*/
public CsvReader ofCsvRecord(final Path file) throws IOException {
return build(new CsvRecordHandler(), file);
}
/**
* Constructs a new {@link CsvReader} for the specified file.
*
* This is a convenience method for calling {@link #build(CsvCallbackHandler, Path, Charset)} with
* {@link CsvRecordHandler} as callback handler.
*
* @param file the file to read data from.
* @param charset the character set to use. If BOM header detection is enabled
* (via {@link #detectBomHeader(boolean)}), this acts as a default
* when no BOM header was found.
* @return a new CsvReader of CsvRecord - never {@code null}. Don't forget to close it!
* @throws IOException if an I/O error occurs.
* @throws NullPointerException if file or charset is {@code null}
*/
public CsvReader ofCsvRecord(final Path file, final Charset charset) throws IOException {
return build(new CsvRecordHandler(), file, charset);
}
/**
* Constructs a new {@link CsvReader} that uses {@link CsvRecord} as record type.
*
* This is a convenience method for calling {@link #build(CsvCallbackHandler, Reader)} with
* {@link NamedCsvRecordHandler} as callback handler.
*
* @param reader the data source to read from.
* @return a new CsvReader of CsvRecord - never {@code null}.
* @throws NullPointerException if reader is {@code null}
*/
public CsvReader ofNamedCsvRecord(final Reader reader) {
return build(new NamedCsvRecordHandler(), reader);
}
/**
* Constructs a new {@link CsvReader} for the specified arguments.
*
* This is a convenience method for calling {@link #build(CsvCallbackHandler, String)} with
* {@link NamedCsvRecordHandler} as callback handler.
*
* @param data the data to read.
* @return a new CsvReader of CsvRecord - never {@code null}.
* @throws NullPointerException if data is {@code null}
*/
public CsvReader ofNamedCsvRecord(final String data) {
return build(new NamedCsvRecordHandler(), data);
}
/**
* Constructs a new {@link CsvReader} for the specified file.
*
* This is a convenience method for calling {@link #build(CsvCallbackHandler, Path)} with
* {@link NamedCsvRecordHandler} as callback handler.
*
* @param file the file to read data from.
* @return a new CsvReader of CsvRecord - never {@code null}. Don't forget to close it!
* @throws IOException if an I/O error occurs.
* @throws NullPointerException if file is {@code null}
*/
public CsvReader ofNamedCsvRecord(final Path file) throws IOException {
return build(new NamedCsvRecordHandler(), file);
}
/**
* Constructs a new {@link CsvReader} for the specified file.
*
* This is a convenience method for calling {@link #build(CsvCallbackHandler, Path, Charset)} with
* {@link NamedCsvRecordHandler} as callback handler.
*
* @param file the file to read data from.
* @param charset the character set to use. If BOM header detection is enabled
* (via {@link #detectBomHeader(boolean)}), this acts as a default
* when no BOM header was found.
* @return a new CsvReader of CsvRecord - never {@code null}. Don't forget to close it!
* @throws IOException if an I/O error occurs.
* @throws NullPointerException if file or charset is {@code null}
*/
public CsvReader ofNamedCsvRecord(final Path file, final Charset charset) throws IOException {
return build(new NamedCsvRecordHandler(), file, charset);
}
/**
* Constructs a new {@link CsvReader} for the specified arguments.
*
* This library uses built-in buffering, so you do not need to pass in a buffered Reader
* implementation such as {@link java.io.BufferedReader}. Performance may be even likely
* better if you do not.
*
* Use {@link #build(CsvCallbackHandler, Path)} for optimal performance when
* reading files and {@link #build(CsvCallbackHandler, String)} when reading Strings.
*
* @param the type of the CSV record.
* @param callbackHandler the record handler to use. Do not reuse a handler after it has been used!
* @param reader the data source to read from.
* @return a new CsvReader - never {@code null}.
* @throws NullPointerException if callbackHandler or reader is {@code null}
*/
public CsvReader build(final CsvCallbackHandler callbackHandler, final Reader reader) {
Objects.requireNonNull(callbackHandler, "callbackHandler must not be null");
Objects.requireNonNull(reader, "reader must not be null");
final CsvParser csvParser = new CsvParser(fieldSeparator, quoteCharacter, commentStrategy,
commentCharacter, callbackHandler, reader);
return newReader(callbackHandler, csvParser);
}
/**
* Constructs a new {@link CsvReader} for the specified arguments.
*
* @param the type of the CSV record.
* @param callbackHandler the record handler to use. Do not reuse a handler after it has been used!
* @param data the data to read.
* @return a new CsvReader - never {@code null}.
* @throws NullPointerException if callbackHandler or data is {@code null}
*/
public CsvReader build(final CsvCallbackHandler callbackHandler, final String data) {
Objects.requireNonNull(callbackHandler, "callbackHandler must not be null");
Objects.requireNonNull(data, "data must not be null");
final CsvParser csvParser = new CsvParser(fieldSeparator, quoteCharacter, commentStrategy,
commentCharacter, callbackHandler, data);
return newReader(callbackHandler, csvParser);
}
/**
* Constructs a new {@link CsvReader} for the specified file.
*
* This is a convenience method for calling {@code of(file, StandardCharsets.UTF_8, callbackHandler)}.
*
* @param the type of the CSV record.
* @param callbackHandler the record handler to use. Do not reuse a handler after it has been used!
* @param file the file to read data from.
* @return a new CsvReader - never {@code null}. Don't forget to close it!
* @throws IOException if an I/O error occurs.
* @throws NullPointerException if callbackHandler or file is {@code null}
*/
public CsvReader build(final CsvCallbackHandler callbackHandler, final Path file) throws IOException {
return build(callbackHandler, file, StandardCharsets.UTF_8);
}
/**
* Constructs a new {@link CsvReader} for the specified arguments.
*
* @param the type of the CSV record.
* @param callbackHandler the record handler to use. Do not reuse a handler after it has been used!
* @param file the file to read data from.
* @param charset the character set to use. If BOM header detection is enabled
* (via {@link #detectBomHeader(boolean)}), this acts as a default
* when no BOM header was found.
* @return a new CsvReader - never {@code null}. Don't forget to close it!
* @throws IOException if an I/O error occurs.
* @throws NullPointerException if callbackHandler, file or charset is {@code null}
*/
public CsvReader build(final CsvCallbackHandler callbackHandler,
final Path file, final Charset charset) throws IOException {
Objects.requireNonNull(callbackHandler, "callbackHandler must not be null");
Objects.requireNonNull(file, "file must not be null");
Objects.requireNonNull(charset, "charset must not be null");
final Reader reader = detectBomHeader
? BomUtil.openReader(file, charset)
: new InputStreamReader(Files.newInputStream(file), charset);
return build(callbackHandler, reader);
}
private CsvReader newReader(final CsvCallbackHandler callbackHandler, final CsvParser csvParser) {
return new CsvReader<>(csvParser, callbackHandler,
commentStrategy, skipEmptyLines, ignoreDifferentFieldCount);
}
@Override
public String toString() {
return new StringJoiner(", ", CsvReaderBuilder.class.getSimpleName() + "[", "]")
.add("fieldSeparator=" + fieldSeparator)
.add("quoteCharacter=" + quoteCharacter)
.add("commentStrategy=" + commentStrategy)
.add("commentCharacter=" + commentCharacter)
.add("skipEmptyLines=" + skipEmptyLines)
.add("ignoreDifferentFieldCount=" + ignoreDifferentFieldCount)
.toString();
}
}
}