com.github.ansell.csv.stream.CSVStream Maven / Gradle / Ivy
/*
* Copyright (c) 2016, Peter Ansell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.github.ansell.csv.stream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.SequenceWriter;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import com.fasterxml.jackson.dataformat.csv.CsvSchema.ColumnType;
/**
* Implements streaming of CSV files for both parsing and writing using Java-8
* Lambda functions such as {@link Consumer} and {@link BiFunction}..
*
* @author Peter Ansell [email protected]
*/
public final class CSVStream {
public static final int DEFAULT_HEADER_COUNT = 1;
/**
* Private constructor for static only class
*/
private CSVStream() {
}
/**
* Stream a CSV file from the given InputStream through the header
* validator, line checker, and if the line checker succeeds, send the
* checked/converted line to the consumer.
*
* @param inputStream
* The {@link InputStream} containing the CSV file.
* @param headersValidator
* The validator of the header line. Throwing
* IllegalArgumentException or other RuntimeExceptions causes the
* parsing process to short-circuit after parsing the header
* line, with a CSVStreamException being rethrown by this code.
* @param lineConverter
* The validator and converter of lines, based on the header
* line. If the lineChecker returns null, the line will not be
* passed to the writer.
* @param resultConsumer
* The consumer of the checked lines.
* @param
* The type of the results that will be created by the
* lineChecker and pushed into the writer {@link Consumer}.
* @throws IOException
* If an error occurred accessing the input.
* @throws CSVStreamException
* If an error occurred validating the input.
*/
public static void parse(final InputStream inputStream, final Consumer> headersValidator,
final BiFunction, List, T> lineConverter, final Consumer resultConsumer)
throws IOException, CSVStreamException {
try (final Reader inputStreamReader = new BufferedReader(
new InputStreamReader(inputStream, StandardCharsets.UTF_8));) {
parse(inputStreamReader, headersValidator, lineConverter, resultConsumer);
}
}
/**
* Stream a CSV file from the given Reader through the header validator,
* line checker, and if the line checker succeeds, send the
* checked/converted line to the consumer.
*
* @param reader
* The {@link Reader} containing the CSV file.
* @param headersValidator
* The validator of the header line. Throwing
* IllegalArgumentException or other RuntimeExceptions causes the
* parsing process to short-circuit after parsing the header
* line, with a CSVStreamException being rethrown by this code.
* @param lineConverter
* The validator and converter of lines, based on the header
* line. If the lineChecker returns null, the line will not be
* passed to the writer.
* @param resultConsumer
* The consumer of the checked lines.
* @param
* The type of the results that will be created by the
* lineChecker and pushed into the writer {@link Consumer}.
* @throws IOException
* If an error occurred accessing the input.
* @throws CSVStreamException
* If an error occurred validating the input.
*/
public static void parse(final Reader reader, final Consumer> headersValidator,
final BiFunction, List, T> lineConverter, final Consumer resultConsumer)
throws IOException, CSVStreamException {
parse(reader, headersValidator, lineConverter, resultConsumer, null);
}
/**
* Stream a CSV file from the given Reader through the header validator,
* line checker, and if the line checker succeeds, send the
* checked/converted line to the consumer.
*
* @param reader
* The {@link Reader} containing the CSV file.
* @param headersValidator
* The validator of the header line. Throwing
* IllegalArgumentException or other RuntimeExceptions causes the
* parsing process to short-circuit after parsing the header
* line, with a CSVStreamException being rethrown by this code.
* @param lineConverter
* The validator and converter of lines, based on the header
* line. If the lineChecker returns null, the line will not be
* passed to the writer.
* @param resultConsumer
* The consumer of the checked lines.
* @param substituteHeaders
* A substitute set of headers or null to use the headers from
* the file. If this is null the first line of the file will be
* used.
* @param
* The type of the results that will be created by the
* lineChecker and pushed into the writer {@link Consumer}.
* @throws IOException
* If an error occurred accessing the input.
* @throws CSVStreamException
* If an error occurred validating the input.
*/
public static void parse(final Reader reader, final Consumer> headersValidator,
final BiFunction, List, T> lineConverter, final Consumer resultConsumer,
final List substituteHeaders) throws IOException, CSVStreamException {
parse(reader, headersValidator, lineConverter, resultConsumer, substituteHeaders, DEFAULT_HEADER_COUNT);
}
/**
* Stream a CSV file from the given Reader through the header validator,
* line checker, and if the line checker succeeds, send the
* checked/converted line to the consumer.
*
* @param reader
* The {@link Reader} containing the CSV file.
* @param headersValidator
* The validator of the header line. Throwing
* IllegalArgumentException or other RuntimeExceptions causes the
* parsing process to short-circuit after parsing the header
* line, with a CSVStreamException being rethrown by this code.
* @param lineConverter
* The validator and converter of lines, based on the header
* line. If the lineChecker returns null, the line will not be
* passed to the writer.
* @param resultConsumer
* The consumer of the checked lines.
* @param substituteHeaders
* A substitute set of headers or null to use the headers from
* the file. If this is null and headerLineCount is set to 0, an
* IllegalArgumentException ill be thrown.
* @param headerLineCount
* The number of header lines to expect
* @param
* The type of the results that will be created by the
* lineChecker and pushed into the writer {@link Consumer}.
* @throws IOException
* If an error occurred accessing the input.
* @throws CSVStreamException
* If an error occurred validating the input.
*/
public static void parse(final Reader reader, final Consumer> headersValidator,
final BiFunction, List, T> lineConverter, final Consumer resultConsumer,
final List substituteHeaders, int headerLineCount) throws IOException, CSVStreamException {
final CsvMapper mapper = defaultMapper();
final CsvSchema schema = defaultSchema();
parse(reader, headersValidator, lineConverter, resultConsumer, substituteHeaders, headerLineCount, mapper,
schema);
}
/**
* Stream a CSV file from the given Reader through the header validator,
* line checker, and if the line checker succeeds, send the
* checked/converted line to the consumer.
*
* @param reader
* The {@link Reader} containing the CSV file.
* @param headersValidator
* The validator of the header line. Throwing
* IllegalArgumentException or other RuntimeExceptions causes the
* parsing process to short-circuit after parsing the header
* line, with a CSVStreamException being rethrown by this code.
* @param lineConverter
* The validator and converter of lines, based on the header
* line. If the lineChecker returns null, the line will not be
* passed to the writer.
* @param resultConsumer
* The consumer of the checked lines.
* @param substituteHeaders
* A substitute set of headers or null to use the headers from
* the file. If this is null and headerLineCount is set to 0, an
* IllegalArgumentException ill be thrown.
* @param headerLineCount
* The number of header lines to expect
* @param
* The type of the results that will be created by the
* lineChecker and pushed into the writer {@link Consumer}.
* @throws IOException
* If an error occurred accessing the input.
* @throws CSVStreamException
* If an error occurred validating the input.
*/
public static void parse(final Reader reader, final Consumer> headersValidator,
final BiFunction, List, T> lineConverter, final Consumer resultConsumer,
final List substituteHeaders, int headerLineCount, CsvMapper mapper, CsvSchema schema)
throws IOException, CSVStreamException {
parse(reader, headersValidator, lineConverter, resultConsumer, substituteHeaders, Collections.emptyList(),
headerLineCount, mapper, schema);
}
/**
* Stream a CSV file from the given Reader through the header validator,
* line checker, and if the line checker succeeds, send the
* checked/converted line to the consumer.
*
* @param reader
* The {@link Reader} containing the CSV file.
* @param headersValidator
* The validator of the header line. Throwing
* IllegalArgumentException or other RuntimeExceptions causes the
* parsing process to short-circuit after parsing the header
* line, with a CSVStreamException being rethrown by this code.
* @param lineConverter
* The validator and converter of lines, based on the header
* line. If the lineChecker returns null, the line will not be
* passed to the writer.
* @param resultConsumer
* The consumer of the checked lines.
* @param substituteHeaders
* A substitute set of headers or null to use the headers from
* the file. If this is null and headerLineCount is set to 0, an
* IllegalArgumentException ill be thrown.
* @param defaultValues
* A list that is either empty, signifying there are no default
* values known, or exactly the same length as each row in the
* CSV file being parsed. If the values for a field are
* empty/missing, and a non-null, non-empty value appears in this
* list, it will be substituted in when calculating the
* statistics.
* @param headerLineCount
* The number of header lines to expect
* @param
* The type of the results that will be created by the
* lineChecker and pushed into the writer {@link Consumer}.
* @throws IOException
* If an error occurred accessing the input.
* @throws CSVStreamException
* If an error occurred validating the input.
*/
public static void parse(final Reader reader, final Consumer> headersValidator,
final BiFunction, List, T> lineConverter, final Consumer resultConsumer,
final List substituteHeaders, final List defaultValues, int headerLineCount,
CsvMapper mapper, CsvSchema schema) throws IOException, CSVStreamException {
if (headerLineCount < 0) {
throw new IllegalArgumentException("Header line count must be non-negative.");
}
if (headerLineCount < 1 && substituteHeaders == null) {
throw new IllegalArgumentException(
"If there are no header lines, a substitute set of headers must be defined.");
}
List headers = substituteHeaders;
if (headers != null) {
try {
headersValidator.accept(headers);
} catch (final Exception e) {
throw new CSVStreamException("Could not verify substituted headers for csv file", e);
}
}
final Function, List> defaultValueReplacer;
// Trivial non-replacer if there were no default values set
if (defaultValues.isEmpty()) {
defaultValueReplacer = l -> l;
} else {
defaultValueReplacer = l -> {
List changedResult = null;
for (int i = 0; i < l.size(); i++) {
if (l.get(i).isEmpty() && !defaultValues.get(i).isEmpty()) {
if (changedResult == null) {
changedResult = new ArrayList<>(l);
}
changedResult.set(i, defaultValues.get(i));
}
}
if (changedResult == null) {
return l;
} else {
return changedResult;
}
};
}
int lineCount = 0;
try (final MappingIterator> it = mapper.readerFor(List.class).with(schema).readValues(reader);) {
while (it.hasNext()) {
List nextLine = it.next();
if (headers == null) {
headers = nextLine.stream().map(v -> v.trim()).map(v -> v.intern()).collect(Collectors.toList());
try {
headersValidator.accept(headers);
} catch (final Exception e) {
throw new CSVStreamException("Could not verify headers for csv file", e);
}
// Default values must either be empty or the exact length
// that the headers (possibly substituteHeaders) were
if (!defaultValues.isEmpty() && headers.size() != defaultValues.size()) {
throw new CSVStreamException(
"Default values list must have the same number of items as the headers: expected "
+ headers.size() + ", found " + defaultValues.size() + " headers=" + headers
+ " defaultValues=" + defaultValues);
}
} else if (lineCount >= headerLineCount) {
if (nextLine.size() != headers.size()) {
throw new CSVStreamException("Line and header sizes were different: expected " + headers.size()
+ ", found " + nextLine.size() + " headers=" + headers + " line=" + nextLine);
}
final List defaultReplacedLine = defaultValueReplacer.apply(nextLine);
final T apply = lineConverter.apply(headers, defaultReplacedLine);
// Line checker returning null indicates that a value was
// not found, and will not be sent to the consumer.
if (apply != null) {
resultConsumer.accept(apply);
}
}
lineCount++;
}
} catch (IOException | CSVStreamException e) {
throw e;
} catch (Exception e) {
throw new CSVStreamException(e);
}
if (headers == null) {
throw new CSVStreamException("CSV file did not contain a valid header line");
}
}
/**
* Writes objects from the given {@link Stream} to the given {@link Writer}
* in CSV format, converting them to a {@link List} of String's using the
* given {@link BiFunction}.
*
* @param writer
* The Writer that will receive the CSV file.
* @param objects
* The Stream of objects to be written
* @param headers
* The headers to use for the resulting CSV file.
* @param objectConverter
* The function to convert an individual object to a line in the
* resulting CSV file, represented as a List of String's.
* @param
* The type of the objects to be converted.
* @throws IOException
* If an error occurred accessing the output stream.
* @throws CSVStreamException
* If an error occurred converting or serialising the objects.
*/
public static void write(final Writer writer, final Stream objects, final List headers,
final BiFunction, T, List> objectConverter) throws IOException, CSVStreamException {
write(writer, objects, buildSchema(headers), objectConverter);
}
/**
* Writes objects from the given {@link Stream} to the given {@link Writer}
* in CSV format, converting them to a {@link List} of String's using the
* given {@link BiFunction}.
*
* @param writer
* The Writer that will receive the CSV file.
* @param objects
* The Stream of objects to be written
* @param schema
* The {@link CsvSchema} to use for the resulting CSV file.
* @param objectConverter
* The function to convert an individual object to a line in the
* resulting CSV file, represented as a List of String's.
* @param
* The type of the objects to be converted.
* @throws IOException
* If an error occurred accessing the output stream.
* @throws CSVStreamException
* If an error occurred converting or serialising the objects.
*/
public static void write(final Writer writer, final Stream objects, final CsvSchema schema,
final BiFunction, T, List> objectConverter) throws IOException, CSVStreamException {
List headers = new ArrayList<>();
schema.iterator().forEachRemaining(c -> headers.add(c.getName()));
try (SequenceWriter csvWriter = newCSVWriter(writer, headers);) {
objects.forEachOrdered(o -> {
try {
csvWriter.write(objectConverter.apply(headers, o));
} catch (Exception e) {
throw new CSVStreamException("Could not write object out", e);
}
});
}
}
/**
* Returns a Jackson {@link SequenceWriter} which will write CSV lines to
* the given {@link OutputStream} using the headers provided.
*
* @param outputStream
* The writer which will receive the CSV file.
* @param headers
* The column headers that will be used by the returned Jackson
* {@link SequenceWriter}.
* @return A Jackson {@link SequenceWriter} that can have
* {@link SequenceWriter#write(Object)} called on it to emit CSV
* lines to the given {@link OutputStream}.
* @throws IOException
* If there is a problem writing the CSV header line to the
* {@link OutputStream}.
*/
public static SequenceWriter newCSVWriter(final OutputStream outputStream, List headers)
throws IOException {
return newCSVWriter(outputStream, buildSchema(headers));
}
/**
* Returns a Jackson {@link SequenceWriter} which will write CSV lines to
* the given {@link OutputStream} using the {@link CsvSchema}.
*
* @param outputStream
* The writer which will receive the CSV file.
* @param schema
* The {@link CsvSchema} that will be used by the returned
* Jackson {@link SequenceWriter}.
* @return A Jackson {@link SequenceWriter} that can have
* {@link SequenceWriter#write(Object)} called on it to emit CSV
* lines to the given {@link OutputStream}.
* @throws IOException
* If there is a problem writing the CSV header line to the
* {@link OutputStream}.
*/
public static SequenceWriter newCSVWriter(final OutputStream outputStream, CsvSchema schema) throws IOException {
return defaultMapper().writerWithDefaultPrettyPrinter().with(schema).forType(List.class)
.writeValues(outputStream);
}
/**
* Returns a Jackson {@link SequenceWriter} which will write CSV lines to
* the given {@link Writer} using the headers provided.
*
* @param writer
* The writer which will receive the CSV file.
* @param headers
* The column headers that will be used by the returned Jackson
* {@link SequenceWriter}.
* @return A Jackson {@link SequenceWriter} that can have
* {@link SequenceWriter#write(Object)} called on it to emit CSV
* lines to the given {@link Writer}.
* @throws IOException
* If there is a problem writing the CSV header line to the
* {@link Writer}.
*/
public static SequenceWriter newCSVWriter(final Writer writer, List headers) throws IOException {
return newCSVWriter(writer, buildSchema(headers));
}
/**
* Returns a Jackson {@link SequenceWriter} which will write CSV lines to
* the given {@link Writer} using the {@link CsvSchema}.
*
* @param writer
* The writer which will receive the CSV file.
* @param schema
* The {@link CsvSchema} that will be used by the returned
* Jackson {@link SequenceWriter}.
* @return A Jackson {@link SequenceWriter} that can have
* {@link SequenceWriter#write(Object)} called on it to emit CSV
* lines to the given {@link Writer}.
* @throws IOException
* If there is a problem writing the CSV header line to the
* {@link Writer}.
*/
public static SequenceWriter newCSVWriter(final Writer writer, CsvSchema schema) throws IOException {
return defaultMapper().writerWithDefaultPrettyPrinter().with(schema).forType(List.class).writeValues(writer);
}
/**
* Build a {@link CsvSchema} object using the given headers.
*
* @param headers
* The list of strings in the header.
* @return A {@link CsvSchema} object including the given header items.
*/
public static CsvSchema buildSchema(List headers) {
return CsvSchema.builder().addColumns(headers, ColumnType.STRING).setUseHeader(true).build();
}
/**
* Returns a {@link CsvMapper} that contains the default settings used by
* csvstream.
*
* @return A new {@link CsvMapper} setup to match the defaults used by
* csvstream
*/
public static CsvMapper defaultMapper() {
final CsvMapper mapper = new CsvMapper();
mapper.enable(CsvParser.Feature.TRIM_SPACES);
mapper.enable(CsvParser.Feature.WRAP_AS_ARRAY);
mapper.configure(JsonParser.Feature.ALLOW_YAML_COMMENTS, true);
return mapper;
}
/**
* Returns a {@link CsvSchema} that contains the default settings used by
* csvstream.
*
* @return A new {@link CsvSchema} setup to match the defaults used by
* csvstream
*/
public static CsvSchema defaultSchema() {
return CsvSchema.emptySchema();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy