All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.ansell.csv.stream.CSVStream Maven / Gradle / Ivy

/*
 * Copyright (c) 2016, Peter Ansell
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * * Redistributions of source code must retain the above copyright notice, this
 *   list of conditions and the following disclaimer.
 * 
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package com.github.ansell.csv.stream;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.SequenceWriter;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import com.fasterxml.jackson.dataformat.csv.CsvSchema.ColumnType;

/**
 * Implements streaming of CSV files for both parsing and writing using Java-8
 * Lambda functions such as {@link Consumer} and {@link BiFunction}..
 * 
 * @author Peter Ansell [email protected]
 */
public final class CSVStream {

	/**
	 * Private constructor for static only class
	 */
	private CSVStream() {
	}

	/**
	 * Stream a CSV file from the given InputStream through the header
	 * validator, line checker, and if the line checker succeeds, send the
	 * checked/converted line to the consumer.
	 * 
	 * @param inputStream
	 *            The {@link InputStream} containing the CSV file.
	 * @param headersValidator
	 *            The validator of the header line. Throwing
	 *            IllegalArgumentException or other RuntimeExceptions causes the
	 *            parsing process to short-circuit after parsing the header
	 *            line, with a CSVStreamException being rethrown by this code.
	 * @param lineConverter
	 *            The validator and converter of lines, based on the header
	 *            line. If the lineChecker returns null, the line will not be
	 *            passed to the writer.
	 * @param resultConsumer
	 *            The consumer of the checked lines.
	 * @param 
	 *            The type of the results that will be created by the
	 *            lineChecker and pushed into the writer {@link Consumer}.
	 * @throws IOException
	 *             If an error occurred accessing the input.
	 * @throws CSVStreamException
	 *             If an error occurred validating the input.
	 */
	public static  void parse(final InputStream inputStream, final Consumer> headersValidator,
			final BiFunction, List, T> lineConverter, final Consumer resultConsumer)
			throws IOException, CSVStreamException {
		try (final InputStreamReader inputStreamReader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);) {
			parse(inputStreamReader, headersValidator, lineConverter, resultConsumer);
		}
	}

	/**
	 * Stream a CSV file from the given Reader through the header validator,
	 * line checker, and if the line checker succeeds, send the
	 * checked/converted line to the consumer.
	 * 
	 * @param reader
	 *            The {@link Reader} containing the CSV file.
	 * @param headersValidator
	 *            The validator of the header line. Throwing
	 *            IllegalArgumentException or other RuntimeExceptions causes the
	 *            parsing process to short-circuit after parsing the header
	 *            line, with a CSVStreamException being rethrown by this code.
	 * @param lineConverter
	 *            The validator and converter of lines, based on the header
	 *            line. If the lineChecker returns null, the line will not be
	 *            passed to the writer.
	 * @param resultConsumer
	 *            The consumer of the checked lines.
	 * @param 
	 *            The type of the results that will be created by the
	 *            lineChecker and pushed into the writer {@link Consumer}.
	 * @throws IOException
	 *             If an error occurred accessing the input.
	 * @throws CSVStreamException
	 *             If an error occurred validating the input.
	 */
	public static  void parse(final Reader reader, final Consumer> headersValidator,
			final BiFunction, List, T> lineConverter, final Consumer resultConsumer)
			throws IOException, CSVStreamException {
		final CsvMapper mapper = new CsvMapper();
		mapper.enable(CsvParser.Feature.TRIM_SPACES);
		mapper.enable(CsvParser.Feature.WRAP_AS_ARRAY);
		mapper.configure(JsonParser.Feature.ALLOW_YAML_COMMENTS, true);

		List headers = null;

		try (final MappingIterator> it = mapper.readerFor(List.class).readValues(reader);) {
			while (it.hasNext()) {
				List nextLine = it.next();
				if (headers == null) {
					headers = nextLine.stream().map(v -> v.trim()).map(v -> v.intern()).collect(Collectors.toList());
					try {
						headersValidator.accept(headers);
					} catch (final Exception e) {
						throw new CSVStreamException("Could not verify headers for csv file", e);
					}
				} else {
					if (nextLine.size() != headers.size()) {
						throw new CSVStreamException(
								"Line and header sizes were different: " + headers + " " + nextLine);
					}

					final T apply = lineConverter.apply(headers, nextLine);

					// Line checker returning null indicates that a value was
					// not found, and will not be sent to the consumer.
					if (apply != null) {
						resultConsumer.accept(apply);
					}
				}
			}
		} catch (Exception e) {
			throw new CSVStreamException(e);
		}

		if (headers == null) {
			throw new CSVStreamException("CSV file did not contain a valid header line");
		}
	}

	/**
	 * Writes objects from the given {@link Stream} to the given {@link Writer}
	 * in CSV format, converting them to a {@link List} of String's using the
	 * given {@link BiFunction}.
	 * 
	 * @param writer
	 *            The Writer that will receive the CSV file.
	 * @param objects
	 *            The Stream of objects to be written
	 * @param headers
	 *            The headers to use for the resulting CSV file.
	 * @param objectConverter
	 *            The function to convert an individual object to a line in the
	 *            resulting CSV file, represented as a List of String's.
	 * @param 
	 *            The type of the objects to be converted.
	 * @throws IOException
	 *             If an error occurred accessing the output stream.
	 * @throws CSVStreamException
	 *             If an error occurred converting or serialising the objects.
	 */
	public static  void write(final Writer writer, final Stream objects, final List headers,
			final BiFunction, T, List> objectConverter) throws IOException, CSVStreamException {
		try (SequenceWriter csvWriter = newCSVWriter(writer, headers);) {
			objects.forEachOrdered(o -> {
				try {
					csvWriter.write(objectConverter.apply(headers, o));
				} catch (Exception e) {
					throw new CSVStreamException("Could not write object out", e);
				}
			});
		}
	}

	/**
	 * Returns a Jackson {@link SequenceWriter} which will write CSV lines to
	 * the given {@link OutputStream} using the headers provided.
	 * 
	 * @param outputStream
	 *            The writer which will receive the CSV file.
	 * @param headers
	 *            The column headers that will be used by the returned Jackson
	 *            {@link SequenceWriter}.
	 * @return A Jackson {@link SequenceWriter} that can have
	 *         {@link SequenceWriter#write(Object)} called on it to emit CSV
	 *         lines to the given {@link OutputStream}.
	 * @throws IOException
	 *             If there is a problem writing the CSV header line to the
	 *             {@link OutputStream}.
	 */
	public static SequenceWriter newCSVWriter(final OutputStream outputStream, List headers) throws IOException {
		return newCSVWriter(outputStream, buildSchema(headers));
	}

	/**
	 * Returns a Jackson {@link SequenceWriter} which will write CSV lines to
	 * the given {@link OutputStream} using the {@link CsvSchema}.
	 * 
	 * @param outputStream
	 *            The writer which will receive the CSV file.
	 * @param schema
	 *            The {@link CsvSchema} that will be used by the returned
	 *            Jackson {@link SequenceWriter}.
	 * @return A Jackson {@link SequenceWriter} that can have
	 *         {@link SequenceWriter#write(Object)} called on it to emit CSV
	 *         lines to the given {@link OutputStream}.
	 * @throws IOException
	 *             If there is a problem writing the CSV header line to the
	 *             {@link OutputStream}.
	 */
	public static SequenceWriter newCSVWriter(final OutputStream outputStream, CsvSchema schema) throws IOException {
		return new CsvMapper().writerWithDefaultPrettyPrinter().with(schema).forType(List.class)
				.writeValues(outputStream);
	}

	/**
	 * Returns a Jackson {@link SequenceWriter} which will write CSV lines to
	 * the given {@link Writer} using the headers provided.
	 * 
	 * @param writer
	 *            The writer which will receive the CSV file.
	 * @param headers
	 *            The column headers that will be used by the returned Jackson
	 *            {@link SequenceWriter}.
	 * @return A Jackson {@link SequenceWriter} that can have
	 *         {@link SequenceWriter#write(Object)} called on it to emit CSV
	 *         lines to the given {@link Writer}.
	 * @throws IOException
	 *             If there is a problem writing the CSV header line to the
	 *             {@link Writer}.
	 */
	public static SequenceWriter newCSVWriter(final Writer writer, List headers) throws IOException {
		return newCSVWriter(writer, buildSchema(headers));
	}

	/**
	 * Returns a Jackson {@link SequenceWriter} which will write CSV lines to
	 * the given {@link Writer} using the {@link CsvSchema}.
	 * 
	 * @param writer
	 *            The writer which will receive the CSV file.
	 * @param schema
	 *            The {@link CsvSchema} that will be used by the returned
	 *            Jackson {@link SequenceWriter}.
	 * @return A Jackson {@link SequenceWriter} that can have
	 *         {@link SequenceWriter#write(Object)} called on it to emit CSV
	 *         lines to the given {@link Writer}.
	 * @throws IOException
	 *             If there is a problem writing the CSV header line to the
	 *             {@link Writer}.
	 */
	public static SequenceWriter newCSVWriter(final Writer writer, CsvSchema schema) throws IOException {
		return new CsvMapper().writerWithDefaultPrettyPrinter().with(schema).forType(List.class).writeValues(writer);
	}

	/**
	 * Build a {@link CsvSchema} object using the given headers.
	 * 
	 * @param headers
	 *            The list of strings in the header.
	 * @return A {@link CsvSchema} object including the given header items.
	 */
	public static CsvSchema buildSchema(List headers) {
		return CsvSchema.builder().addColumns(headers, ColumnType.STRING).setUseHeader(true).build();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy