All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jsapar.parse.csv.CsvParser Maven / Gradle / Ivy

Go to download

JSaPar is a Java library providing a schema based parser and composer of almost collected sorts of delimited and fixed width files.

The newest version!
package org.jsapar.parse.csv;

import org.jsapar.error.JSaParException;
import org.jsapar.model.Line;
import org.jsapar.parse.line.ValidationHandler;
import org.jsapar.parse.text.TextSchemaParser;
import org.jsapar.schema.CsvSchema;
import org.jsapar.schema.SchemaLine;
import org.jsapar.text.TextParseConfig;

import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

/**
 * Internal class for parsing CSV input.
 */
public class CsvParser implements TextSchemaParser {
    private final CsvLineReader lineReader;
    private final CsvSchema schema;
    private final CsvLineParserFactory lineParserFactory;
    private final TextParseConfig parseConfig;
    private final ValidationHandler validationHandler = new ValidationHandler();

    CsvParser(Reader reader, CsvSchema schema) {
        this(reader, schema, new TextParseConfig());
    }


    public CsvParser(Reader reader, CsvSchema schema, TextParseConfig parseConfig) {
        this.parseConfig = parseConfig;
        lineReader = new CsvLineReaderStates(schema.getLineSeparator(), reader, schema.stream().anyMatch(SchemaLine::isOccursInfinitely), parseConfig.getMaxLineLength(), schema.getQuoteSyntax());
        this.schema = schema;
        this.lineParserFactory = new CsvLineParserFactory(schema, parseConfig);
    }
    

    @Override
    public long parse(Consumer listener, Consumer errorListener) throws IOException {
        if(schema.isEmpty()) {
            return 0;
        }
        long lineNumber = 0;
        while(true){
            CsvLineParser lineParser = lineParserFactory.makeLineParser(lineReader);
            if(lineParser == null) {
                if(lineParserFactory.isEmpty())
                    return lineNumber; // No more parsers. We should not read anymore. Leave rest of input as is.
                if(lineReader.eofReached())
                    return lineNumber;
                handleNoParser(lineReader, errorListener);
                continue;
            }
            if(!lineParser.parse(lineReader, listener, errorListener))
                return lineNumber;
            if(!lineReader.lastLineWasEmpty())
                lineNumber++;
        }

    }

    public Stream stream(Consumer errorListener) throws IOException {
        if(schema.isEmpty()) {
            return Stream.empty();
        }
        try {
            Spliterator spliterator = new Spliterator<>() {
                @Override
                public boolean tryAdvance(Consumer action) {
                    try {
                        CsvLineParser lineParser = lineParserFactory.makeLineParser(lineReader);
                        if (lineParser == null) {
                            if (lineParserFactory.isEmpty() || lineReader.eofReached())
                                return false;
                            handleNoParser(lineReader, errorListener);
                            return true;
                        }
                        return lineParser.parse(lineReader, action, errorListener);
                    } catch (IOException e) {
                        throw new UncheckedIOException(e);
                    }
                }

                @Override
                public Spliterator trySplit() {
                    return null;
                }

                @Override
                public long estimateSize() {
                    return Long.MAX_VALUE;
                }

                @Override
                public int characteristics() {
                    return IMMUTABLE|ORDERED|NONNULL;
                }
            };

            return StreamSupport.stream(spliterator, false);
        }catch (UncheckedIOException e){
            throw e.getCause();
        }

    }
    private void handleNoParser(CsvLineReader lineReader, Consumer errorEventListener)
            throws IOException {
        if (lineReader.lastLineWasEmpty())
            return;
        validationHandler.lineValidation(
                lineReader.currentLineNumber(), parseConfig.getOnUndefinedLineType(), errorEventListener,
                ()->"No schema line could be used to parse line number " + lineReader.currentLineNumber());
        lineReader.skipLine();
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy