
net.optionfactory.csv.parser.DefaultCsvParser Maven / Gradle / Ivy
The newest version!
package net.optionfactory.csv.parser;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.optionfactory.csv.parser.CsvParserSpi.Record;
import net.optionfactory.csv.parser.javacc.ParseException;
public class DefaultCsvParser implements CsvParser {
@Override
public CsvParsingResult parse(Separator format, Trimming trimming, RecordsTrimming recordsTrimming, String sourceName, Reader reader) {
final CsvParserSpi parser = format.parser(reader);
final CsvParsingResult result = CsvParsingResult.empty();
try {
final Record record = parser.record(trimming);
if (record == null) {
result.problems.add(CsvParsingProblems.noSchema(sourceName));
return result;
}
result.schema = record.values;
final CsvParsingResult dataResult = parse(format, trimming, recordsTrimming, sourceName, parser, result.schema);
result.problems = dataResult.problems;
result.records = dataResult.records;
return result;
} catch (ParseException ex) {
final Source s = Source.of(ex.currentToken.beginLine, ex.currentToken.beginColumn, ex.currentToken.endLine, ex.currentToken.endColumn);
final List> expectedOneOf = new ArrayList<>();
for (int[] expectedTokenSequence : ex.expectedTokenSequences) {
final List expected = new ArrayList<>();
for (int e : expectedTokenSequence) {
expected.add(format.tokenImage(e));
}
expectedOneOf.add(expected);
}
result.problems.add(CsvParsingProblems.unparseableSchema(sourceName, s, ex.currentToken.image, expectedOneOf));
return result;
}
}
@Override
public CsvParsingResult parse(Separator f, Trimming trimming, RecordsTrimming recordsTrimming, String sourceName, Reader r, List externalSchema) {
return parse(f, trimming, recordsTrimming, sourceName, f.parser(r), externalSchema);
}
private static CsvParsingResult parse(Separator format, Trimming trimming, RecordsTrimming recordsTrimming, String sourceName, CsvParserSpi parser, List externalSchema) {
final CsvParsingResult result = CsvParsingResult.withSchema(externalSchema);
for (;;) {
try {
final Record recordOrNull = parser.record(trimming);
if (recordOrNull == null) {
while (RecordsTrimming.EmptyLinesAtEnd == recordsTrimming && !result.records.isEmpty() && result.records.get(result.records.size() - 1).values().stream().allMatch(f -> f.isEmpty())) {
result.records.remove(result.records.size() - 1);
}
return result;
}
if (recordOrNull.values.size() != externalSchema.size()) {
result.problems.add(CsvParsingProblems.schemaViolation(sourceName, recordOrNull.source, recordOrNull.values.toString(), externalSchema));
continue;
}
final Map values = new ConcurrentHashMap<>();
for (int i = 0; i != recordOrNull.values.size(); i++) {
final String fieldName = externalSchema.get(i);
final String fieldValue = recordOrNull.values.get(i);
values.put(fieldName, fieldValue);
}
result.records.add(values);
} catch (ParseException ex) {
final Source s = Source.of(ex.currentToken.beginLine, ex.currentToken.beginColumn, ex.currentToken.endLine, ex.currentToken.endColumn);
final List> expectedOneOf = new ArrayList<>();
for (int[] expectedTokenSequence : ex.expectedTokenSequences) {
final List expected = new ArrayList<>();
for (int e : expectedTokenSequence) {
expected.add(format.tokenImage(e));
}
expectedOneOf.add(expected);
}
result.problems.add(CsvParsingProblems.unparseableRecord(sourceName, s, ex.currentToken.image, expectedOneOf));
try {
parser.skipToNextRecord();
} catch (ParseException neverHappens) {
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy