All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.univocity.parsers.common.AbstractParser Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright 2014 Univocity Software Pty Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.univocity.parsers.common;

import com.univocity.parsers.common.input.EOFException;
import com.univocity.parsers.common.input.*;
import com.univocity.parsers.common.iterators.*;
import com.univocity.parsers.common.processor.*;
import com.univocity.parsers.common.processor.core.*;
import com.univocity.parsers.common.record.*;

import java.io.*;
import java.nio.charset.*;
import java.util.*;

import static com.univocity.parsers.common.ArgumentUtils.*;

/**
 * The AbstractParser class provides a common ground for all parsers in univocity-parsers.
 * 

It handles all settings defined by {@link CommonParserSettings}, and delegates the parsing algorithm implementation to its subclasses through the * abstract method {@link AbstractParser#parseRecord()} *

The following (absolutely required) attributes are exposed to subclasses: *

    *
  • input ({@link CharInputReader}): the character input provider that reads characters from a given input into an internal buffer
  • *
  • output ({@link ParserOutput}): the output handler for every record parsed from the input. Implementors must use this object to handle the input (such as appending characters and notifying of values parsed)
  • *
  • ch (char): the current character read from the input
  • *
* * @param The specific parser settings configuration class, which can potentially provide additional configuration options supported by the parser * implementation. * * @author Univocity Software Pty Ltd - [email protected] * @see com.univocity.parsers.csv.CsvParser * @see com.univocity.parsers.csv.CsvParserSettings * @see com.univocity.parsers.fixed.FixedWidthParser * @see com.univocity.parsers.fixed.FixedWidthParserSettings * @see com.univocity.parsers.common.input.CharInputReader * @see com.univocity.parsers.common.ParserOutput */ public abstract class AbstractParser> { protected final T settings; protected final ParserOutput output; private final long recordsToRead; protected final char comment; private final LineReader lineReader = new LineReader(); protected ParsingContext context; protected Processor processor; protected CharInputReader input; protected char ch; private final ProcessorErrorHandler errorHandler; private final long rowsToSkip; protected final Map comments; protected String lastComment; private final boolean collectComments; private final int errorContentLength; private boolean extractingHeaders = false; private final boolean extractHeaders; protected final int whitespaceRangeStart; protected boolean ignoreTrailingWhitespace; protected boolean ignoreLeadingWhitespace; private final boolean processComments; /** * All parsers must support, at the very least, the settings provided by {@link CommonParserSettings}. The AbstractParser requires its configuration to be * properly initialized. * * @param settings the parser configuration */ public AbstractParser(T settings) { settings.autoConfigure(); this.settings = settings; this.errorContentLength = settings.getErrorContentLength(); this.ignoreTrailingWhitespace = settings.getIgnoreTrailingWhitespaces(); this.ignoreLeadingWhitespace = settings.getIgnoreLeadingWhitespaces(); this.output = new ParserOutput(this, settings); this.processor = settings.getProcessor(); this.recordsToRead = settings.getNumberOfRecordsToRead(); this.comment = settings.getFormat().getComment(); this.errorHandler = settings.getProcessorErrorHandler(); this.rowsToSkip = settings.getNumberOfRowsToSkip(); this.collectComments = settings.isCommentCollectionEnabled(); this.comments = collectComments ? new TreeMap() : Collections.emptyMap(); this.extractHeaders = settings.isHeaderExtractionEnabled(); this.whitespaceRangeStart = settings.getWhitespaceRangeStart(); this.processComments = settings.isCommentProcessingEnabled(); } protected void processComment() { if (collectComments) { long line = input.lineCount(); String comment = input.readComment(); if (comment != null) { lastComment = comment; comments.put(line, lastComment); } } else { try { input.skipLines(1); } catch (IllegalArgumentException e) { //end of input reached, ignore. } } } /** * Parses the entirety of a given input and delegates each parsed row to an instance of {@link RowProcessor}, defined by * {@link CommonParserSettings#getRowProcessor()}. * * @param reader The input to be parsed. */ public final void parse(Reader reader) { beginParsing(reader); try { while (!context.isStopped()) { input.markRecordStart(); ch = input.nextChar(); if (processComments && inComment()) { processComment(); continue; } if (output.pendingRecords.isEmpty()) { parseRecord(); } String[] row = output.rowParsed(); if (row != null) { if (recordsToRead >= 0 && context.currentRecord() >= recordsToRead) { context.stop(); if (recordsToRead == 0) { stopParsing(); return; } } if (processor != NoopProcessor.instance) { rowProcessed(row); } } } stopParsing(); } catch (EOFException ex) { try { handleEOF(); while (!output.pendingRecords.isEmpty()) { handleEOF(); } } finally { stopParsing(); } } catch (Throwable ex) { try { ex = handleException(ex); } finally { stopParsing(ex); } } } /** * Parser-specific implementation for reading a single record from the input. *

The AbstractParser handles the initialization and processing of the input until it is ready to be parsed. *

It then delegates the input to the parser-specific implementation defined by {@link #parseRecord()}. In general, an implementation of * {@link AbstractParser#parseRecord()} will perform the following steps: *

    *
  • Test the character stored in ch and take some action on it (e.g. is while (ch != '\n'){doSomething()})
  • *
  • Request more characters by calling ch = input.nextChar();
  • *
  • Append the desired characters to the output by executing, for example, output.appender.append(ch)
  • *
  • Notify a value of the record has been fully read by executing output.valueParsed(). This will clear the output appender ({@link CharAppender}) so the next call to output.appender.append(ch) will be store the character of the next parsed value
  • *
  • Rinse and repeat until all values of the record are parsed
  • *
*

Once the {@link #parseRecord()} returns, the AbstractParser takes over and handles the information (generally, reorganizing it and passing it on to a {@link RowProcessor}). *

After the record processing, the AbstractParser reads the next characters from the input, delegating control again to the parseRecord() implementation for processing of the next record. *

This cycle repeats until the reading process is stopped by the user, the input is exhausted, or an error happens. *

In case of errors, the unchecked exception {@link TextParsingException} will be thrown and all resources in use will be closed automatically * unless {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false}. The exception should contain the cause and more information about where in the input the error happened. * * @see com.univocity.parsers.common.input.CharInputReader * @see com.univocity.parsers.common.input.CharAppender * @see com.univocity.parsers.common.ParserOutput * @see com.univocity.parsers.common.TextParsingException * @see com.univocity.parsers.common.processor.RowProcessor */ protected abstract void parseRecord(); /** * Allows the parser implementation to handle any value that was being consumed when the end of the input was reached * * @return a flag indicating whether the parser was processing a value when the end of the input was reached. */ protected boolean consumeValueOnEOF() { return false; } private String[] handleEOF() { String[] row = null; try { boolean consumeValueOnEOF = consumeValueOnEOF(); if (output.column != 0 || (consumeValueOnEOF && !context.isStopped())) { if (output.appender.length() > 0 || consumeValueOnEOF) { output.valueParsed(); } else if (input.currentParsedContentLength() > 0) { output.emptyParsed(); } row = output.rowParsed(); } else if (output.appender.length() > 0 || input.currentParsedContentLength() > 0) { if (output.appender.length() == 0) { output.emptyParsed(); } else { output.valueParsed(); } row = output.rowParsed(); } else if (!output.pendingRecords.isEmpty()) { row = output.pendingRecords.poll(); } } catch (Throwable e) { throw handleException(e); } if (row != null && processor != NoopProcessor.instance) { rowProcessed(row); } return row; } /** * Starts an iterator-style parsing cycle. If a {@link RowProcessor} is provided in the configuration, it will be used to perform additional processing. * The parsed records must be read one by one with the invocation of {@link AbstractParser#parseNext()}. * The user may invoke @link {@link AbstractParser#stopParsing()} to stop reading from the input. * * @param reader The input to be parsed. */ public final void beginParsing(Reader reader) { output.reset(); if (reader instanceof LineReader) { input = new DefaultCharInputReader(settings.getFormat().getLineSeparator(), settings.getFormat().getNormalizedNewline(), settings.getInputBufferSize(), whitespaceRangeStart, true); } else { input = settings.newCharInputReader(whitespaceRangeStart); } input.enableNormalizeLineEndings(true); context = createParsingContext(); if (processor instanceof DefaultConversionProcessor) { DefaultConversionProcessor conversionProcessor = ((DefaultConversionProcessor) processor); conversionProcessor.errorHandler = errorHandler; conversionProcessor.context = context; } if (input instanceof AbstractCharInputReader) { ((AbstractCharInputReader) input).addInputAnalysisProcess(getInputAnalysisProcess()); } try { input.start(reader); } catch (Throwable t) { throw handleException(t); } input.skipLines(rowsToSkip); initialize(); processor.processStarted(context); } void extractHeadersIfRequired() { while (extractHeaders && output.parsedHeaders == null && !context.isStopped() && !extractingHeaders) { Processor userProvidedProcessor = processor; try { processor = NoopProcessor.instance; //disables any users provided processors to capture headers extractingHeaders = true; parseNext(); } finally { extractingHeaders = false; processor = userProvidedProcessor; } } } protected ParsingContext createParsingContext() { DefaultParsingContext out = new DefaultParsingContext(this, errorContentLength); out.stopped = false; return out; } protected void initialize() { } /** * Allows the parser implementation to traverse the input buffer before the parsing process starts, in order to enable automatic configuration and discovery * of data formats. * * @return a custom implementation of {@link InputAnalysisProcess}. By default, {@code null} is returned and no special input analysis will be performed. */ protected InputAnalysisProcess getInputAnalysisProcess() { return null; } private String getParsedContent(CharSequence tmp) { return "Parsed content: " + AbstractException.restrictContent(errorContentLength, tmp); } private TextParsingException handleException(Throwable ex) { if (context != null) { context.stop(); } if (ex instanceof DataProcessingException) { DataProcessingException error = (DataProcessingException) ex; error.restrictContent(errorContentLength); error.setContext(this.context); throw error; } String message = ex.getClass().getName() + " - " + ex.getMessage(); char[] chars = output.appender.getChars(); if (chars != null) { int length = output.appender.length(); if (length > chars.length) { message = "Length of parsed input (" + length + ") exceeds the maximum number of characters defined in" + " your parser settings (" + settings.getMaxCharsPerColumn() + "). "; length = chars.length; } String tmp = new String(chars); if (tmp.contains("\n") || tmp.contains("\r")) { tmp = displayLineSeparators(tmp, true); String lineSeparator = displayLineSeparators(settings.getFormat().getLineSeparatorString(), false); message += "\nIdentified line separator characters in the parsed content. This may be the cause of the error. " + "The line separator in your parser settings is set to '" + lineSeparator + "'. " + getParsedContent(tmp); } int nullCharacterCount = 0; //ensuring the StringBuilder won't grow over Integer.MAX_VALUE to avoid OutOfMemoryError int maxLength = length > Integer.MAX_VALUE / 2 ? Integer.MAX_VALUE / 2 - 1 : length; StringBuilder s = new StringBuilder(maxLength); for (int i = 0; i < maxLength; i++) { if (chars[i] == '\0') { s.append('\\'); s.append('0'); nullCharacterCount++; } else { s.append(chars[i]); } } tmp = s.toString(); if (nullCharacterCount > 0) { message += "\nIdentified " + nullCharacterCount + " null characters ('\0') on parsed content. This may " + "indicate the data is corrupt or its encoding is invalid. Parsed content:\n\t" + getParsedContent(tmp); } } if (ex instanceof ArrayIndexOutOfBoundsException) { try { int index = Integer.parseInt(ex.getMessage()); if (index == settings.getMaxCharsPerColumn()) { message += "\nHint: Number of characters processed may have exceeded limit of " + index + " characters per column. Use settings.setMaxCharsPerColumn(int) to define the maximum number of characters a column can have"; } if (index == settings.getMaxColumns()) { message += "\nHint: Number of columns processed may have exceeded limit of " + index + " columns. Use settings.setMaxColumns(int) to define the maximum number of columns your input can have"; } message += "\nEnsure your configuration is correct, with delimiters, quotes and escape sequences that match the input format you are trying to parse"; } catch (Throwable t) { //ignore; } } try { if (!message.isEmpty()) { message += "\n"; } message += "Parser Configuration: " + settings.toString(); } catch (Exception t) { //ignore } if (errorContentLength == 0) { output.appender.reset(); } TextParsingException out = new TextParsingException(context, message, ex); out.setErrorContentLength(errorContentLength); return out; } /** * In case of errors, stops parsing and closes all open resources. Avoids hiding the original exception in case another error occurs when stopping. */ private void stopParsing(Throwable error) { if (error != null) { try { stopParsing(); } catch (Throwable ex) { // ignore and throw original error. } if (error instanceof DataProcessingException) { DataProcessingException ex = (DataProcessingException) error; ex.setContext(context); throw ex; } else if (error instanceof RuntimeException) { throw (RuntimeException) error; } else if (error instanceof Error) { throw (Error) error; } else { throw new IllegalStateException(error.getMessage(), error); } } else { stopParsing(); } } /** * Stops parsing and closes all open resources. */ public final void stopParsing() { try { ch = '\0'; try { if (context != null) { context.stop(); } } finally { try { if (processor != null) { processor.processEnded(context); } } finally { if (output != null) { output.appender.reset(); } if (input != null) { input.stop(); } } } } catch (Throwable error) { throw handleException(error); } } private List beginParseAll(boolean validateReader, Reader reader, int expectedRowCount) { if (reader == null) { if (validateReader) { throw new IllegalStateException("Input reader must not be null"); } else { if (context == null) { throw new IllegalStateException("Input not defined. Please call method 'beginParsing()' with a valid input."); } else if (context.isStopped()) { return Collections.emptyList(); } } } List out = new ArrayList(expectedRowCount <= 0 ? 10000 : expectedRowCount); if (reader != null) { beginParsing(reader); } return out; } /** * Parses all remaining rows from the input and returns them in a list. * * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all remaining records parsed from the input. */ public List parseAll(int expectedRowCount) { return internalParseAll(false, null, expectedRowCount); } /** * Parses all remaining rows from the input and returns them in a list. * * @return the list of all remaining records parsed from the input. */ public List parseAll() { return internalParseAll(false, null, -1); } /** * Parses all remaining {@link Record}s from the input and returns them in a list. * * @param expectedRowCount expected number of {@link Record}s to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all remaining records parsed from the input. */ public List parseAllRecords(int expectedRowCount) { return internalParseAllRecords(false, null, expectedRowCount); } /** * Parses all remaining {@link Record}s from the input and returns them in a list. * * @return the list of all remaining {@link Record}s parsed from the input. */ public List parseAllRecords() { return internalParseAllRecords(false, null, -1); } /** * Parses all records from the input and returns them in a list. * * @param reader the input to be parsed * * @return the list of all records parsed from the input. */ public final List parseAll(Reader reader) { return parseAll(reader, 0); } /** * Parses all records from the input and returns them in a list. * * @param reader the input to be parsed * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the input. */ public final List parseAll(Reader reader, int expectedRowCount) { return internalParseAll(true, reader, expectedRowCount); } private final List internalParseAll(boolean validateReader, Reader reader, int expectedRowCount) { List out = beginParseAll(validateReader, reader, expectedRowCount); String[] row; while ((row = parseNext()) != null) { out.add(row); } return out; } protected boolean inComment() { return ch == comment; } /** * Parses the next record from the input. Note that {@link AbstractParser#beginParsing(Reader)} must have been invoked once before calling this method. * If the end of the input is reached, then this method will return null. Additionally, all resources will be closed automatically at the end of the input * or if any error happens while parsing, * unless {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false}. * * @return The record parsed from the input or null if there's no more characters to read. */ public final String[] parseNext() { try { while (!context.isStopped()) { input.markRecordStart(); ch = input.nextChar(); if (processComments && inComment()) { processComment(); continue; } if (output.pendingRecords.isEmpty()) { parseRecord(); } String[] row = output.rowParsed(); if (row != null) { if (recordsToRead >= 0 && context.currentRecord() >= recordsToRead) { context.stop(); if (recordsToRead == 0L) { stopParsing(); return null; } } if (processor != NoopProcessor.instance) { rowProcessed(row); } return row; } else if (extractingHeaders) { return null; } } if (output.column != 0) { return output.rowParsed(); } stopParsing(); return null; } catch (EOFException ex) { String[] row = handleEOF(); if (output.pendingRecords.isEmpty()) { stopParsing(); } return row; } catch (NullPointerException ex) { if (context == null) { throw new IllegalStateException("Cannot parse without invoking method beginParsing(Reader) first"); } else { if (input != null) { stopParsing(); } throw new IllegalStateException("Error parsing next record.", ex); } } catch (Throwable ex) { try { ex = handleException(ex); } finally { stopParsing(ex); } } return null; } /** * Reloads headers from settings. */ protected final void reloadHeaders() { this.output.initializeHeaders(); if (context instanceof DefaultParsingContext) { ((DefaultParsingContext) context).reset(); } } /** * Parses a single line from a String in the format supported by the parser implementation. * * @param line a line of text to be parsed * * @return the {@link Record} containing the values parsed from the input line */ public final Record parseRecord(String line) { String[] values = parseLine(line); if (values == null) { return null; } return context.toRecord(values); } /** * Parses a single line from a String in the format supported by the parser implementation. * * @param line a line of text to be parsed * * @return the values parsed from the input line */ public final String[] parseLine(String line) { if (line == null || line.isEmpty()) { return null; } lineReader.setLine(line); if (context == null || context.isStopped()) { beginParsing(lineReader); } else { if (input instanceof DefaultCharInputReader) { ((DefaultCharInputReader) input).reloadBuffer(); } else if (input instanceof LookaheadCharInputReader) { ((LookaheadCharInputReader) input).reloadBuffer(); } } try { while (!context.isStopped()) { input.markRecordStart(); ch = input.nextChar(); if (processComments && inComment()) { processComment(); return null; } if (output.pendingRecords.isEmpty()) { parseRecord(); } String[] row = output.rowParsed(); if (row != null) { if (processor != NoopProcessor.instance) { rowProcessed(row); } return row; } } return null; } catch (EOFException ex) { return handleEOF(); } catch (NullPointerException ex) { if (input != null) { stopParsing(null); } throw new IllegalStateException("Error parsing next record.", ex); } catch (Throwable ex) { try { ex = handleException(ex); } finally { stopParsing(ex); } } return null; } private void rowProcessed(String[] row) { Internal.process(row, processor, context, errorHandler); } /** * Parses the entirety of a given file and delegates each parsed row to an instance of {@link RowProcessor}, defined by * {@link CommonParserSettings#getRowProcessor()}. * * @param file The file to be parsed. */ public final void parse(File file) { parse(ArgumentUtils.newReader(file)); } /** * Parses the entirety of a given file and delegates each parsed row to an instance of {@link RowProcessor}, defined by * {@link CommonParserSettings#getRowProcessor()}. * * @param file The file to be parsed. * @param encoding the encoding of the file */ public final void parse(File file, String encoding) { parse(ArgumentUtils.newReader(file, encoding)); } /** * Parses the entirety of a given file and delegates each parsed row to an instance of {@link RowProcessor}, defined by * {@link CommonParserSettings#getRowProcessor()}. * * @param file The file to be parsed. * @param encoding the encoding of the file */ public final void parse(File file, Charset encoding) { parse(ArgumentUtils.newReader(file, encoding)); } /** * Parses the entirety of a given input and delegates each parsed row to an instance of {@link RowProcessor}, defined by * {@link CommonParserSettings#getRowProcessor()}. * * @param input The input to be parsed. The input stream will be closed automatically, unless {@link CommonParserSettings#isAutoClosingEnabled()} evaluates * to {@code false}. */ public final void parse(InputStream input) { parse(ArgumentUtils.newReader(input)); } /** * Parses the entirety of a given input and delegates each parsed row to an instance of {@link RowProcessor}, defined by * {@link CommonParserSettings#getRowProcessor()}. * * @param input The input to be parsed. The input stream will be closed automatically, unless {@link CommonParserSettings#isAutoClosingEnabled()} * evaluates to {@code false}. * @param encoding the encoding of the input stream */ public final void parse(InputStream input, String encoding) { parse(ArgumentUtils.newReader(input, encoding)); } /** * Parses the entirety of a given input and delegates each parsed row to an instance of {@link RowProcessor}, defined by * {@link CommonParserSettings#getRowProcessor()}. * * @param input The input to be parsed. The input stream will be closed automatically, unless {@link CommonParserSettings#isAutoClosingEnabled()} * evaluates to {@code false}. * @param encoding the encoding of the input stream */ public final void parse(InputStream input, Charset encoding) { parse(ArgumentUtils.newReader(input, encoding)); } /** * Starts an iterator-style parsing cycle. If a {@link RowProcessor} is provided in the configuration, it will be used to perform additional processing. * The parsed records must be read one by one with the invocation of {@link AbstractParser#parseNext()}. * The user may invoke @link {@link AbstractParser#stopParsing()} to stop reading from the input. * * @param file The file to be parsed. */ public final void beginParsing(File file) { beginParsing(ArgumentUtils.newReader(file)); } /** * Starts an iterator-style parsing cycle. If a {@link RowProcessor} is provided in the configuration, it will be used to perform additional processing. * The parsed records must be read one by one with the invocation of {@link AbstractParser#parseNext()}. * The user may invoke @link {@link AbstractParser#stopParsing()} to stop reading from the input. * * @param file The file to be parsed. * @param encoding the encoding of the file */ public final void beginParsing(File file, String encoding) { beginParsing(ArgumentUtils.newReader(file, encoding)); } /** * Starts an iterator-style parsing cycle. If a {@link RowProcessor} is provided in the configuration, it will be used to perform additional processing. * The parsed records must be read one by one with the invocation of {@link AbstractParser#parseNext()}. * The user may invoke @link {@link AbstractParser#stopParsing()} to stop reading from the input. * * @param file The file to be parsed. * @param encoding the encoding of the file */ public final void beginParsing(File file, Charset encoding) { beginParsing(ArgumentUtils.newReader(file, encoding)); } /** * Starts an iterator-style parsing cycle. If a {@link RowProcessor} is provided in the configuration, it will be used to perform additional processing. * The parsed records must be read one by one with the invocation of {@link AbstractParser#parseNext()}. * The user may invoke @link {@link AbstractParser#stopParsing()} to stop reading from the input. * * @param input The input to be parsed. The input stream will be closed automatically in case of errors unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false}. */ public final void beginParsing(InputStream input) { beginParsing(ArgumentUtils.newReader(input)); } /** * Starts an iterator-style parsing cycle. If a {@link RowProcessor} is provided in the configuration, it will be used to perform additional processing. * The parsed records must be read one by one with the invocation of {@link AbstractParser#parseNext()}. * The user may invoke @link {@link AbstractParser#stopParsing()} to stop reading from the input. * * @param input The input to be parsed. The input stream will be closed automatically in case of errors unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false}. * @param encoding the encoding of the input stream */ public final void beginParsing(InputStream input, String encoding) { beginParsing(ArgumentUtils.newReader(input, encoding)); } /** * Starts an iterator-style parsing cycle. If a {@link RowProcessor} is provided in the configuration, it will be used to perform additional processing. * The parsed records must be read one by one with the invocation of {@link AbstractParser#parseNext()}. * The user may invoke @link {@link AbstractParser#stopParsing()} to stop reading from the input. * * @param input The input to be parsed. The input stream will be closed automatically in case of errors unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false}. * @param encoding the encoding of the input stream */ public final void beginParsing(InputStream input, Charset encoding) { beginParsing(ArgumentUtils.newReader(input, encoding)); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the file. */ public final List parseAll(File file, int expectedRowCount) { return parseAll(ArgumentUtils.newReader(file), expectedRowCount); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param encoding the encoding of the file * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the file. */ public final List parseAll(File file, String encoding, int expectedRowCount) { return parseAll(ArgumentUtils.newReader(file, encoding), expectedRowCount); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param encoding the encoding of the file * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the file. */ public final List parseAll(File file, Charset encoding, int expectedRowCount) { return parseAll(ArgumentUtils.newReader(file, encoding), expectedRowCount); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false} * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the input. */ public final List parseAll(InputStream input, int expectedRowCount) { return parseAll(ArgumentUtils.newReader(input), expectedRowCount); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false} * @param encoding the encoding of the input stream * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the input. */ public final List parseAll(InputStream input, String encoding, int expectedRowCount) { return parseAll(ArgumentUtils.newReader(input, encoding), expectedRowCount); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false} * @param encoding the encoding of the input stream * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the input. */ public final List parseAll(InputStream input, Charset encoding, int expectedRowCount) { return parseAll(ArgumentUtils.newReader(input, encoding), expectedRowCount); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * * @return the list of all records parsed from the file. */ public final List parseAll(File file) { return parseAll(ArgumentUtils.newReader(file)); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param encoding the encoding of the file * * @return the list of all records parsed from the file. */ public final List parseAll(File file, String encoding) { return parseAll(ArgumentUtils.newReader(file, encoding)); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param encoding the encoding of the file * * @return the list of all records parsed from the file. */ public final List parseAll(File file, Charset encoding) { return parseAll(ArgumentUtils.newReader(file, encoding)); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless {@link CommonParserSettings#isAutoClosingEnabled()} * evaluates to {@code false} * * @return the list of all records parsed from the input. */ public final List parseAll(InputStream input) { return parseAll(ArgumentUtils.newReader(input)); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless {@link CommonParserSettings#isAutoClosingEnabled()} * evaluates to {@code false} * @param encoding the encoding of the input stream * * @return the list of all records parsed from the input. */ public final List parseAll(InputStream input, String encoding) { return parseAll(ArgumentUtils.newReader(input, encoding)); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless {@link CommonParserSettings#isAutoClosingEnabled()} * evaluates to {@code false} * @param encoding the encoding of the input stream * * @return the list of all records parsed from the input. */ public final List parseAll(InputStream input, Charset encoding) { return parseAll(ArgumentUtils.newReader(input, encoding)); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the file. */ public final List parseAllRecords(File file, int expectedRowCount) { return parseAllRecords(ArgumentUtils.newReader(file), expectedRowCount); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param encoding the encoding of the file * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the file. */ public final List parseAllRecords(File file, String encoding, int expectedRowCount) { return parseAllRecords(ArgumentUtils.newReader(file, encoding), expectedRowCount); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param encoding the encoding of the file * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the file. */ public final List parseAllRecords(File file, Charset encoding, int expectedRowCount) { return parseAllRecords(ArgumentUtils.newReader(file, encoding), expectedRowCount); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false} * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the input. */ public final List parseAllRecords(InputStream input, int expectedRowCount) { return parseAllRecords(ArgumentUtils.newReader(input), expectedRowCount); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false} * @param encoding the encoding of the input stream * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the input. */ public final List parseAllRecords(InputStream input, String encoding, int expectedRowCount) { return parseAllRecords(ArgumentUtils.newReader(input, encoding), expectedRowCount); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless * {@link CommonParserSettings#isAutoClosingEnabled()} evaluates to {@code false} * @param encoding the encoding of the input stream * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the input. */ public final List parseAllRecords(InputStream input, Charset encoding, int expectedRowCount) { return parseAllRecords(ArgumentUtils.newReader(input, encoding), expectedRowCount); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * * @return the list of all records parsed from the file. */ public final List parseAllRecords(File file) { return parseAllRecords(ArgumentUtils.newReader(file)); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param encoding the encoding of the file * * @return the list of all records parsed from the file. */ public final List parseAllRecords(File file, String encoding) { return parseAllRecords(ArgumentUtils.newReader(file, encoding)); } /** * Parses all records from a file and returns them in a list. * * @param file the input file to be parsed * @param encoding the encoding of the file * * @return the list of all records parsed from the file. */ public final List parseAllRecords(File file, Charset encoding) { return parseAllRecords(ArgumentUtils.newReader(file, encoding)); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless {@link CommonParserSettings#isAutoClosingEnabled()} * evaluates to {@code false} * * @return the list of all records parsed from the input. */ public final List parseAllRecords(InputStream input) { return parseAllRecords(ArgumentUtils.newReader(input)); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless {@link CommonParserSettings#isAutoClosingEnabled()} * evaluates to {@code false} * @param encoding the encoding of the input stream * * @return the list of all records parsed from the input. */ public final List parseAllRecords(InputStream input, String encoding) { return parseAllRecords(ArgumentUtils.newReader(input, encoding)); } /** * Parses all records from an input stream and returns them in a list. * * @param input the input stream to be parsed. The input stream will be closed automatically unless {@link CommonParserSettings#isAutoClosingEnabled()} * evaluates to {@code false} * @param encoding the encoding of the input stream * * @return the list of all records parsed from the input. */ public final List parseAllRecords(InputStream input, Charset encoding) { return parseAllRecords(ArgumentUtils.newReader(input, encoding)); } /** * Parses all records from the input and returns them in a list. * * @param reader the input to be parsed * @param expectedRowCount expected number of rows to be parsed from the input. * Used to pre-allocate the size of the output {@link List} * * @return the list of all records parsed from the input. */ public final List parseAllRecords(Reader reader, int expectedRowCount) { return internalParseAllRecords(true, reader, expectedRowCount); } private List internalParseAllRecords(boolean validateReader, Reader reader, int expectedRowCount) { List out = beginParseAll(validateReader, reader, expectedRowCount); if (context.isStopped()) { return out; } Record record; while ((record = parseNextRecord()) != null) { out.add(record); } return out; } /** * Parses all records from the input and returns them in a list. * * @param reader the input to be parsed * * @return the list of all records parsed from the input. */ public final List parseAllRecords(Reader reader) { return parseAllRecords(reader, 0); } /** * Parses the next record from the input. Note that {@link AbstractParser#beginParsing(Reader)} must have been invoked once before calling this method. * If the end of the input is reached, then this method will return null. Additionally, all resources will be closed automatically at the end of the input * or if any error happens while parsing. * * @return The record parsed from the input or null if there's no more characters to read. */ public final Record parseNextRecord() { String[] row = this.parseNext(); if (row != null) { return context.toRecord(row); } return null; } /** * Returns all comments collected by the parser so far. * An empty map will be returned if {@link CommonParserSettings#isCommentCollectionEnabled()} evaluates to {@code false}. * * @return a map containing the line numbers and comments found in each. */ final Map getComments() { return comments; } /** * Returns the last comment found in the input. * {@code null} will be returned if {@link CommonParserSettings#isCommentCollectionEnabled()} is evaluated to {@code false}. * * @return the last comment found in the input. */ final String getLastComment() { return lastComment; } /** * Returns the headers parsed from the input, if and only if {@link CommonParserSettings#headerExtractionEnabled} is {@code true}. * The result of this method won't return the list of headers manually set by the user in {@link CommonParserSettings#getHeaders()}. * * @return the headers parsed from the input, when {@link CommonParserSettings#headerExtractionEnabled} is {@code true}. */ final String[] getParsedHeaders() { extractHeadersIfRequired(); return output.parsedHeaders; } /** * Returns the current parsing context with information about the status of the parser at any given time. * * @return the parsing context */ public final ParsingContext getContext() { return context; } /** * Returns the metadata associated with {@link Record}s parsed from the input using {@link AbstractParser#parseAllRecords(File)} or * {@link AbstractParser#parseNextRecord()}. * * @return the metadata of {@link Record}s generated with the current input. */ public final RecordMetaData getRecordMetadata() { if (context == null) { throw new IllegalStateException("Record metadata not available. The parser has not been started."); } return context.recordMetaData(); } /** * Provides an {@link IterableResult} for iterating rows parsed from the input. * * @param input the input {@code File} * @param encoding the encoding of the input {@code File} * * @return an iterator for rows parsed from the input. */ public final IterableResult iterate(final File input, String encoding) { return iterate(input, Charset.forName(encoding)); } /** * Provides an {@link IterableResult} for iterating rows parsed from the input. * * @param input the input {@code File} * @param encoding the encoding of the input {@code File} * * @return an iterator for rows parsed from the input. */ public final IterableResult iterate(final File input, final Charset encoding) { return new RowIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input, encoding); } }; } /** * Provides an {@link IterableResult} for iterating rows parsed from the input. * * @param input the input {@code File} * * @return an iterator for rows parsed from the input. */ public final IterableResult iterate(final File input) { return new RowIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input); } }; } /** * Provides an {@link IterableResult} for iterating rows parsed from the input. * * @param input the input {@code Reader} * * @return an {@code iterable} over the results of parsing the {@code Reader} */ public final IterableResult iterate(final Reader input) { return new RowIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input); } }; } /** * Provides an {@link IterableResult} for iterating rows parsed from the input. * * @param input the the {@code InputStream} with contents to be parsed * @param encoding the character encoding to be used for processing the given input. * * @return an iterator for rows parsed from the input. */ public final IterableResult iterate(final InputStream input, String encoding) { return iterate(input, Charset.forName(encoding)); } /** * Provides an {@link IterableResult} for iterating rows parsed from the input. * * @param input the the {@code InputStream} with contents to be parsed * @param encoding the character encoding to be used for processing the given input. * * @return an iterator for rows parsed from the input. */ public final IterableResult iterate(final InputStream input, final Charset encoding) { return new RowIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input, encoding); } }; } /** * Provides an {@link IterableResult} for iterating rows parsed from the input. * * @param input the the {@code InputStream} with contents to be parsed * * @return an iterator for rows parsed from the input. */ public final IterableResult iterate(final InputStream input) { return new RowIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input); } }; } /** * Provides an {@link IterableResult} for iterating records parsed from the input. * * @param input the input {@code File} * @param encoding the encoding of the input {@code File} * * @return an iterator for records parsed from the input. */ public final IterableResult iterateRecords(final File input, String encoding) { return iterateRecords(input, Charset.forName(encoding)); } /** * Provides an {@link IterableResult} for iterating records parsed from the input. * * @param input the input {@code File} * @param encoding the encoding of the input {@code File} * * @return an iterator for records parsed from the input. */ public final IterableResult iterateRecords(final File input, final Charset encoding) { return new RecordIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input, encoding); } }; } /** * Provides an {@link IterableResult} for iterating records parsed from the input. * * @param input the input {@code File} * * @return an iterator for records parsed from the input. */ public final IterableResult iterateRecords(final File input) { return new RecordIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input); } }; } /** * Provides an {@link IterableResult} for iterating records parsed from the input. * * @param input the input {@code Reader} * * @return an iterator for records parsed from the input. */ public final IterableResult iterateRecords(final Reader input) { return new RecordIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input); } }; } /** * Provides an {@link IterableResult} for iterating records parsed from the input. * * @param input the the {@code InputStream} with contents to be parsed * @param encoding the character encoding to be used for processing the given input. * * @return an iterator for records parsed from the input. */ public final IterableResult iterateRecords(final InputStream input, String encoding) { return iterateRecords(input, Charset.forName(encoding)); } /** * Provides an {@link IterableResult} for iterating records parsed from the input. * * @param input the the {@code InputStream} with contents to be parsed * @param encoding the character encoding to be used for processing the given input. * * @return an iterator for records parsed from the input. */ public final IterableResult iterateRecords(final InputStream input, final Charset encoding) { return new RecordIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input, encoding); } }; } /** * Provides an {@link IterableResult} for iterating records parsed from the input. * * @param input the the {@code InputStream} with contents to be parsed * * @return an iterator for records parsed from the input. */ public final IterableResult iterateRecords(final InputStream input) { return new RecordIterator(this) { @Override protected void beginParsing() { parser.beginParsing(input); } }; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy