org.sfm.csv.CsvParser Maven / Gradle / Ivy
Show all versions of simpleFlatMapper Show documentation
package org.sfm.csv;
import org.sfm.csv.impl.CsvColumnDefinitionProviderImpl;
import org.sfm.csv.impl.DynamicCsvMapper;
import org.sfm.csv.parser.*;
import org.sfm.map.CaseInsensitiveFieldKeyNamePredicate;
import org.sfm.reflect.ReflectionService;
import org.sfm.reflect.TypeReference;
import org.sfm.reflect.meta.ClassMeta;
import org.sfm.tuples.*;
import org.sfm.utils.Predicate;
import org.sfm.utils.RowHandler;
import java.io.IOException;
import java.io.Reader;
import java.lang.reflect.Type;
import java.util.*;
//IFJAVA8_START
import java.util.stream.Stream;
//IFJAVA8_END
/**
* CsvParser provides an fluent DSL to parse or map csv content.
* It is possible to customize the quote char, the separator, skip lines,and specified the size of the buffer
*
*
* CsvParser
*
.quote('\'')
*
.separator(';')
*
.skip(2)
*
.bufferSize(256)
*
.stream(new StringReader("1;1\n2;2\n3;3\n4;4\n5;5"))
*
.map(Arrays::toString)
*
.forEach(System.out::println);
*
// output
*
// [3, 3]
*
// [4, 4]
*
// [5, 5]
*
*
*
* the limit settings does not affect streams or iterator, only parse on DSL and forEach on the mapTo/mapWith DSL.
*
* The DSL provides way to mapTo an object
*
* CsvParser.mapTo(MyClass.class).stream(reader).forEach(System.out::println);
*
*
* using static mapping when no headers
*
* CsvParser.mapTo(MyClass.class).addHeaders("id", "field").stream(reader).forEach(System.out::println);
* // using the addMapping
* CsvParser.mapTo(MyClass.class).addMapping("id").addMapping("field").stream(reader).forEach(System.out::println);
*
*
* using static mapping and ignoring source header
*
* CsvParser.mapTo(MyClass.class).overrideHeaders("id", "field").stream(reader).forEach(System.out::println);
* // using the addMapping
* CsvParser.skip(1).mapTo(MyClass.class).addMapping("id").addMapping("field").stream(reader).forEach(System.out::println);
*
*
* or mapping with a predefined mapper.
*
* CsvMapper<MyClass> mapper = CsvMapperFactory.newInstance().newMapper(MyClass.class);
* CsvParser.mapWith(mapper).stream(reader).forEach(System.out::println);
*
*
*
* Each call to the DSL return an immutable representation of the current setup. So that it is possible to cache step of the DSL without side effect.
*
* CsvParser.DSL semiColumnParser = CsvParser.separator(';');
*
* try (Reader r = new FileReader(file)) {
* // the limit does not modify to the semiColumnParser object
* semiColumnParser.limit(3).stream(r);
* }
*
*
*
*/
public final class CsvParser {
/**
*
* @param c the separator char
* @return the DSL object
*/
public static DSL separator(char c) {
return schema().separator(c);
}
public static DSL bufferSize(int size) {
return schema().bufferSize(size);
}
public static DSL quote(char c) {
return schema().quote(c);
}
public static DSL skip(int skip) {
return schema().skip(skip);
}
private static DSL schema() {
return new DSL();
}
public static DSL limit(int limit) {
return schema().limit(limit);
}
public static MapToDSL mapTo(Type type) {
return schema().mapTo(type);
}
public static MapToDSL mapTo(Class type) {
return schema().mapTo(type);
}
public static MapToDSL mapTo(TypeReference type) {
return schema().mapTo(type);
}
public static MapToDSL> mapTo(Class class1, Class class2) {
return schema().mapTo(class1, class2);
}
public static MapToDSL> mapTo(Class class1, Class class2, Class class3) {
return schema().mapTo(class1, class2, class3);
}
public static MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4) {
return schema().mapTo(class1, class2, class3, class4);
}
public static MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4, Class class5) {
return schema().mapTo(class1, class2, class3, class4, class5);
}
public static MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4, Class class5, Class class6) {
return schema().mapTo(class1, class2, class3, class4, class5, class6);
}
public static MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4, Class class5, Class class6, Class class7) {
return schema().mapTo(class1, class2, class3, class4, class5, class6, class7);
}
public static MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4, Class class5, Class class6, Class class7, Class class8) {
return schema().mapTo(class1, class2, class3, class4, class5, class6, class7, class8);
}
public static MapWithDSL mapWith(CsvMapper mapper) {
return schema().mapWith(mapper);
}
/**
* @param reader the reader
* @return a csv reader based on the default setup.
* @throws java.io.IOException if an error occurs reading the data
*/
public static CsvReader reader(Reader reader) throws IOException {
return schema().reader(reader);
}
@SuppressWarnings("deprecation")
@Deprecated
public static Iterator iterate(Reader reader) throws IOException {
return schema().iterate(reader);
}
@SuppressWarnings("deprecation")
public static Iterator iterator(Reader reader) throws IOException {
return iterate(reader);
}
public static CC parse(Reader reader, CC cellConsumer) throws IOException {
return schema().parse(reader, cellConsumer);
}
//IFJAVA8_START
public static Stream stream(Reader r) throws IOException {
return schema().stream(r);
}
//IFJAVA8_END
/**
* DSL for csv parsing.
* @see org.sfm.csv.CsvParser
*/
public static final class DSL {
private final char separatorChar;
private final char quoteChar;
private final int bufferSize;
private final int skip;
private final int limit;
private DSL() {
separatorChar = ',';
quoteChar= '"';
bufferSize = 8192;
skip = 0;
limit = -1;
}
public DSL(char separatorChar, char quoteChar, int bufferSize, int skip, int limit) {
this.separatorChar = separatorChar;
this.quoteChar = quoteChar;
this.bufferSize = bufferSize;
this.skip = skip;
this.limit = limit;
}
/**
* set the separator character. the default value is ','.
* @param c the new separator character
* @return this
*/
public DSL separator(char c) {
return new DSL(c, quoteChar, bufferSize, skip, limit);
}
/**
* set the quote character. the default value is '"'.
* @param c the quote character
* @return this
*/
public DSL quote(char c) {
return new DSL(separatorChar, c, bufferSize, skip, limit);
}
/**
* set the size of the char buffer to read from.
* @param size the size in bytes
* @return this
*/
public DSL bufferSize(int size) {
return new DSL(separatorChar, quoteChar, size, skip, limit);
}
/**
* set the number of line to skip.
* @param skip number of line to skip.
* @return this
*/
public DSL skip(int skip) {
return new DSL(separatorChar, quoteChar, bufferSize, skip, limit);
}
/**
* set the number of row to process. limit does not affect stream or iterator.
* @param limit number of row to process
* @return this
*/
public DSL limit(int limit) {
return new DSL(separatorChar, quoteChar, bufferSize, skip, limit);
}
/**
* Parse the content from the reader as a csv and call back the cellConsumer with the cell values.
* @param reader the reader
* @param cellConsumer the callback object for each cell value
* @param the type of the cell consumer
* @return cellConsumer
* @throws java.io.IOException if and error occurs in the reader
*/
public CC parse(Reader reader, CC cellConsumer) throws IOException {
CsvReader csvreader = reader(reader);
if (limit == -1) {
return csvreader.parseAll(cellConsumer);
} else {
return csvreader.parseRows(cellConsumer, limit);
}
}
/**
* Create a CsvReader and the specified reader. Will skip the number of specified rows.
* @param reader the content
* @return a CsvReader on the reader.
* @throws java.io.IOException if an io error occurs
*/
public CsvReader reader(Reader reader) throws IOException {
CsvReader csvReader = new CsvReader(reader, charConsumer());
csvReader.skipRows(skip);
return csvReader;
}
@Deprecated
public Iterator iterate(Reader reader) throws IOException {
return reader(reader).iterator();
}
@SuppressWarnings("deprecation")
public Iterator iterator(Reader reader) throws IOException {
return iterate(reader);
}
public MapToDSL mapTo(Type target) {
return new MapToDSL(this, target);
}
public MapToDSL mapTo(Class target) {
return mapTo((Type)target);
}
public MapToDSL mapTo(TypeReference target) {
return mapTo(target.getType());
}
public MapToDSL> mapTo(Class class1, Class class2) {
return new MapToDSL>(this, Tuples.typeDef(class1, class2));
}
public MapToDSL> mapTo(Class class1, Class class2, Class class3) {
return new MapToDSL>(this, Tuples.typeDef(class1, class2, class3));
}
public MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4) {
return new MapToDSL>(this, Tuples.typeDef(class1, class2, class3, class4));
}
public MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4, Class class5) {
return new MapToDSL>(this, Tuples.typeDef(class1, class2, class3, class4, class5));
}
public MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4, Class class5, Class class6) {
return new MapToDSL>(this, Tuples.typeDef(class1, class2, class3, class4, class5, class6));
}
public MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4, Class class5, Class class6, Class class7) {
return new MapToDSL>(this, Tuples.typeDef(class1, class2, class3, class4, class5, class6, class7));
}
public MapToDSL> mapTo(Class class1, Class class2, Class class3, Class class4, Class class5, Class class6, Class class7, Class class8) {
return new MapToDSL>(this, Tuples.typeDef(class1, class2, class3, class4, class5, class6, class7, class8));
}
public MapWithDSL mapWith(CsvMapper mapper) {
return new MapWithDSL(this, mapper);
}
//IFJAVA8_START
public Stream stream(Reader reader) throws IOException {
return reader(reader).stream();
}
//IFJAVA8_END
private CsvCharConsumer charConsumer() {
CharBuffer charBuffer = new CharBuffer(bufferSize);
if (separatorChar == ',' && quoteChar == '"') {
return new StandardCsvCharConsumer(charBuffer);
} else {
return new ConfigurableCsvCharConsumer(charBuffer, separatorChar, quoteChar);
}
}
public int bufferSize() {
return bufferSize;
}
public int limit() {
return limit;
}
public int skip() {
return skip;
}
public char separator() {
return separatorChar;
}
public char quote() {
return quoteChar;
}
}
/**
* DSL for csv mapping to a dynamic mapper.
* @see org.sfm.csv.CsvParser
* @see org.sfm.csv.CsvMapper
*/
public static final class MapToDSL extends MapWithDSL {
private final ClassMeta classMeta;
private final Type mapToClass;
private final CsvColumnDefinitionProviderImpl columnDefinitionProvider;
public MapToDSL(DSL dsl, Type mapToClass) {
this(dsl, ReflectionService.newInstance().getClassMeta(mapToClass), mapToClass, new CsvColumnDefinitionProviderImpl());
}
private MapToDSL(DSL dsl, ClassMeta classMeta, Type mapToClass, CsvColumnDefinitionProviderImpl columnDefinitionProvider) {
super(dsl, new DynamicCsvMapper(mapToClass, classMeta, columnDefinitionProvider));
this.mapToClass = mapToClass;
this.classMeta = classMeta;
this.columnDefinitionProvider = columnDefinitionProvider;
}
public StaticMapToDSL headers(String... headers) {
return new StaticMapToDSL(getDsl(), classMeta, mapToClass, getColumnDefinitions(headers), columnDefinitionProvider);
}
public StaticMapToDSL defaultHeaders() {
return headers(classMeta.generateHeaders());
}
public StaticMapToDSL overrideHeaders(String... headers) {
List> columns = getColumnDefinitions(headers);
return new StaticMapToDSL(getDsl().skip(1), classMeta, mapToClass, columns, columnDefinitionProvider);
}
private List> getColumnDefinitions(String[] headers) {
List> columns = new ArrayList>();
for(String header : headers) {
columns.add(new Tuple2(header, CsvColumnDefinition.identity()));
}
return columns;
}
public MapToDSL columnDefinition(String column, CsvColumnDefinition columnDefinition) {
return columnDefinition(new CaseInsensitiveFieldKeyNamePredicate(column), columnDefinition);
}
public MapToDSL columnDefinition(Predicate super CsvColumnKey> predicate, CsvColumnDefinition columnDefinition) {
return new MapToDSL(getDsl(), classMeta, mapToClass, newColumnDefinitionProvider(predicate, columnDefinition));
}
public MapWithDSL addKeys(String... keys) {
List, CsvColumnDefinition>> definitions = columnDefinitionProvider.getDefinitions();
for(String key : keys) {
definitions.add(new Tuple2, CsvColumnDefinition>(new CaseInsensitiveFieldKeyNamePredicate(key),
CsvColumnDefinition.key()));
}
return new MapToDSL(getDsl(), classMeta, mapToClass, new CsvColumnDefinitionProviderImpl(definitions));
}
private CsvColumnDefinitionProviderImpl newColumnDefinitionProvider(Predicate super CsvColumnKey> predicate, CsvColumnDefinition columnDefinition) {
List, CsvColumnDefinition>> definitions = columnDefinitionProvider.getDefinitions();
definitions.add(new Tuple2, CsvColumnDefinition>(predicate, columnDefinition));
return new CsvColumnDefinitionProviderImpl(definitions);
}
public StaticMapToDSL overrideWithDefaultHeaders() {
return overrideHeaders(classMeta.generateHeaders());
}
public StaticMapToDSL addMapping(String column) {
return staticMapper().addMapping(column);
}
public StaticMapToDSL addKey(String key) {
return staticMapper().addKey(key);
}
public StaticMapToDSL addMapping(String column, CsvColumnDefinition columnDefinition) {
return staticMapper().addMapping(column, columnDefinition);
}
private StaticMapToDSL staticMapper() {
return new StaticMapToDSL(getDsl().skip(1), classMeta, mapToClass, Collections.>emptyList(), columnDefinitionProvider);
}
}
/**
* DSL for csv mapping to a static mapper.
* @see org.sfm.csv.CsvParser
* @see org.sfm.csv.CsvMapper
*/
public static final class StaticMapToDSL extends MapWithDSL {
private final ClassMeta classMeta;
private final Type mapToClass;
private final CsvColumnDefinitionProviderImpl columnDefinitionProvider;
private final List> columns;
private StaticMapToDSL(DSL dsl, ClassMeta classMeta, Type mapToClass, List> columns, CsvColumnDefinitionProviderImpl columnDefinitionProvider) {
super(dsl, newStaticMapper(mapToClass, classMeta, columns, columnDefinitionProvider));
this.classMeta = classMeta;
this.mapToClass = mapToClass;
this.columns = columns;
this.columnDefinitionProvider = columnDefinitionProvider;
}
private static CsvMapper newStaticMapper(Type mapToClass, ClassMeta classMeta, List> columns, CsvColumnDefinitionProviderImpl columnDefinitionProvider) {
CsvMapperBuilder builder = new CsvMapperBuilder(mapToClass, classMeta, columnDefinitionProvider);
for(Tuple2 col: columns) {
builder.addMapping(col.first(), col.second());
}
return builder.mapper();
}
public StaticMapToDSL addMapping(String column) {
return addMapping(column, CsvColumnDefinition.identity());
}
public StaticMapToDSL addMapping(String column, CsvColumnDefinition columnDefinition) {
List> newColumns = new ArrayList>(columns);
newColumns.add(new Tuple2(column, columnDefinition));
return new StaticMapToDSL(getDsl(), classMeta, mapToClass, newColumns, columnDefinitionProvider);
}
public StaticMapToDSL addKey(String key) {
return addMapping(key, CsvColumnDefinition.key());
}
}
/**
* DSL for csv mapping to a provided mapper.
* @see org.sfm.csv.CsvParser
* @see org.sfm.csv.CsvMapper
*/
public static class MapWithDSL {
private final DSL dsl;
private final CsvMapper mapper;
public MapWithDSL(DSL dsl, CsvMapper mapper) {
this.dsl = dsl;
this.mapper = mapper;
}
protected final DSL getDsl() {
return dsl;
}
protected final CsvMapper getMapper() {
return mapper;
}
@Deprecated
public final Iterator iterate(Reader reader) throws IOException {
return mapper.iterator(dsl.reader(reader));
}
@SuppressWarnings("deprecation")
public final Iterator iterator(Reader reader) throws IOException {
return iterate(reader);
}
public final > H forEach(Reader reader, H rowHandler) throws IOException {
if (dsl.limit == -1) {
mapper.forEach(dsl.reader(reader), rowHandler);
} else {
mapper.forEach(dsl.reader(reader), rowHandler, dsl.limit);
}
return rowHandler;
}
//IFJAVA8_START
public final Stream stream(Reader reader) throws IOException {
return mapper.stream(dsl.reader(reader));
}
//IFJAVA8_END
}
// public static void main(String[] args) throws IOException {
// CsvParser
// .quote('\'')
// .separator(';')
// .skip(2)
// .bufferSize(256)
// .stream(new StringReader("1;1\n2;2\n3;3\n4;4\n5;5"))
// .map(Arrays::toString)
// .forEach(System.out::println);
// }
}