All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nerd4j.csv.parser.CSVParserImpl Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * #%L
 * Nerd4j CSV
 * %%
 * Copyright (C) 2013 Nerd4j
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as 
 * published by the Free Software Foundation, either version 3 of the 
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 * 
 * You should have received a copy of the GNU General Lesser Public 
 * License along with this program.  If not, see
 * .
 * #L%
 */
package org.nerd4j.csv.parser;

import java.io.IOException;
import java.io.Reader;
import java.util.LinkedList;
import java.util.Queue;

import org.nerd4j.csv.RemarkableASCII;
import org.nerd4j.csv.exception.MalformedCSVException;
import org.nerd4j.csv.parser.CSVParserFactory.CharacterClass;

/**
 * CSVParser reads character data from a {@link Reader}, parsing and
 * tokenizing them into {@link CSVToken}.
 * 
 * 

* After usage it should be closed to permit resource release. *

* * @author Nerd4j Team */ final class CSVParserImpl implements CSVParser { /** Size of {@link #buffer reading char buffer}, 8MB. */ private static final int BUFFER_SIZE = 1024 * 1024 * 8; /** {@link CharacterClass} mappings from {@link #characterConfiguration}. */ private final int[] types; /** * Accept more lazy quotes: in unquoted fields ({@link FieldState#NORMAL} * and {@link FieldState#NORMAL_END}) are handled as * {@link CharacterClass#NORMAL} without beeing escaped */ private final boolean lazyQuotes; /** CSV data source reader. */ private final Reader reader; /** Shared field builder. */ private final FieldBuilder builder; /** Buffer from which read characters. Populated from {@link #reader} */ private final char[] buffer; /** Next character to be read from the {@link #buffer reading char buffer}. */ private int bufferIndex; /** * Number of characters currently hold by the {@link #buffer reading char * buffer}. Normally equals to {@link #BUFFER_SIZE} but the last buffer read * before completely emptying the {@link #reader}. */ private int bufferElements; /** Last read field termination reason. */ private FieldEndReason previousFieldEndReason; /** Current read token. */ private CSVToken token; /** * Current token value, if any. As string to avoid to invoke * {@link FieldBuilder#toString()} multiple times. */ private String value; /** Counts the actually read characters. */ private int charCount; /** * Next read tokens, if any. Needed when a field terminates with a record * separator or a end of data. */ private Queue nexts; /* ******************** */ /* *** CONSTRUCTORS *** */ /* ******************** */ /** * Create a new {@link CSVParserImpl} with the given character configuration. * * @param reader csv data source. * @param characterConfiguration character classes configuration. */ CSVParserImpl( final Reader reader, final int[] types, boolean lazyQuotes ) { this.reader = reader; this.types = types; this.lazyQuotes = lazyQuotes ; this.nexts = new LinkedList(); this.bufferElements = 0; this.bufferIndex = BUFFER_SIZE; this.buffer = new char[ BUFFER_SIZE ]; this.token = null; this.value = null; this.charCount = 0; this.builder = new FieldBuilderImpl( 1024 ); this.previousFieldEndReason = FieldEndReason.UNKNOWN; } /* ************************ */ /* *** PUBLIC INTERFACE *** */ /* ************************ */ /** * {@inheritDoc} */ @Override public CSVToken getCurrentToken() { return token; } /** * {@inheritDoc} */ @Override public String getCurrentValue() { return value; } /** * {@inheritDoc} */ @Override public CSVToken read() throws IOException { /* Evaluate and return current token. */ return token = readField( true ); } /** * {@inheritDoc} */ @Override public CSVToken skip() throws IOException { /* Evaluate and return current token (dummy field builder). */ return token = readField( false ); } /** * {@inheritDoc} */ @Override public void close() throws IOException { reader.close(); token = null; nexts.clear(); nexts = null; value = null; } /* ****************************** */ /* *** PRIVATE IMPLEMENTATION *** */ /* ****************************** */ /** * Enumerates all possible field parsing state (state machine). Previously * was an enum but we discovered that switching on constants is really * faster. */ private interface FieldState { /** Starting state, only to be ignore chars has been read. */ public static final int INITIAL = 0 << 15; /** Reading a not quoted field. */ public static final int NORMAL = 1 << 15; /** Reading escape on a not quoted field. */ public static final int NORMAL_ESCAPE = 2 << 15; /** Trying to read a not quoted field termination. */ public static final int NORMAL_END = 3 << 15; /** Reading a quoted field. */ public static final int QUOTED = 4 << 15; /** Reading escape on a quoted field. */ public static final int QUOTED_ESCAPE = 5 << 15; /** Trying to read a quoted field termination. */ public static final int QUOTED_END = 6 << 15; /** Reading double quote on a quoted field. */ public static final int DOUBLE_QUOTE = 7 << 15; } /** * Enumerates end of field reasons (all possible ways for a field to * terminate). */ private enum FieldEndReason { /** Before ending to read any field the end reason in unknown. */ UNKNOWN, /** Found a field separator: {@link CharacterClass#FIELD_SEPARATOR}. */ FIELD_SEPARATOR, /** * Found a row separator {@link CharacterClass#RECORD_SEPARATOR}, * {@link CharacterClass#RECORD_SEPARATOR_1}, * {@link CharacterClass#RECORD_SEPARATOR_2}. */ RECORD_SEPARATOR, /** Reached data reader end. */ DATA_END; } /** * Read the next {@link CSVToken} and returns it. *

* Given {@link FieldBuilder} will be used as support while reading * data and cleaned ({@link FieldBuilder#clear()}) before returning. *

* * @param read tells if to actually read the field or to skip it. * @return next read {@link CSVToken}. * @throws IOException if an error occurs while parsing data. */ private CSVToken readField( final boolean read ) throws IOException { /* Check if already reached data end. */ if( token == CSVToken.END_OF_DATA ) return CSVToken.END_OF_DATA; /* Clear previous iteration data. */ value = null; charCount = 0; /* Currently evaluated token. */ CSVToken token; /* Check if there is a next token to returns instead of reading another one. */ if( ! nexts.isEmpty() ) { /* Next can't be a field. */ token = nexts.poll(); return token; } /* Parse a new field and get the reason why the read ended. */ final FieldEndReason currentFieldEndReason = parseField( read ); /* Setup read value (if any) and clear field builder. */ if( read ) { value = builder.toString(); builder.clear(); } /* Manipulate FieldEndReason to generate CSVTokens. */ switch ( currentFieldEndReason ) { /* The end of the stream has been read. */ case DATA_END: switch( previousFieldEndReason ) { /* * If the previous end reason in unknown it means that * the CSV source is empty. */ case UNKNOWN: token = CSVToken.END_OF_DATA; break; /* * If the previous read ended with a record separator * we can have two situations: * * 1. The standard case of a CSV source that ends after * the last record has ended properly. * * 2. The case of a single column CSV where the last * row ends without a proper record separator. */ case RECORD_SEPARATOR: /* * In the case of a standard well formed CSV (first case) * the last value read is empty. In this case we just * notify an END_OF_DATA. */ if( charCount < 2 ) { token = CSVToken.END_OF_DATA; break; } /* * In the case of a single column CSV that ends * in a non standard way we behave in the same * way as when we read a FIELD_SEPARATOR. */ case FIELD_SEPARATOR: /* * In this case we can end with an empty * field or with a valued field, in both * cases we notify that we have read a * FIELD than we and the reading. */ token = CSVToken.FIELD; nexts.add( CSVToken.END_OF_RECORD ); nexts.add( CSVToken.END_OF_DATA ); break; /* * This case can never be reached due to the * check at the beginning of the method. * But just in case we behave as expected. */ case DATA_END: token = CSVToken.END_OF_DATA; break; default: throw new IllegalStateException( "Unknown reason: " + currentFieldEndReason + ". This is a bug evidence." ); } break; /* * A record separator has been read. * In this case we always consider to read * at least one FIELD as well. Even if the * RECORD_SEPARATOR is at the beginning of * the CSV source it means that we read an * empty field. */ case RECORD_SEPARATOR: /* * In this case we store the fact that * an END_OF_RECORD has been read for * future use and behave in the same * way as when we read a FIELD_SEPARATOR. */ nexts.add( CSVToken.END_OF_RECORD ); /* A field separator has been read. */ case FIELD_SEPARATOR: /* * In this case we just notify that * a FIELD has been read. */ token = CSVToken.FIELD; break; default: throw new IllegalStateException( "Unknown reason: " + currentFieldEndReason + ". This is a bug evidence." ); } /* Save current stop reason for the next iteration. */ previousFieldEndReason = currentFieldEndReason; /* Return evaluated token. */ return token; } /** * Read and parse a field from {@link #reader} and write it on a * {@link FieldBuilder}. * * @param read tells if to actually read the field or to skip it. * @return reason of field end. * * @throws IOException if an error occurs while reading characters. */ private FieldEndReason parseField( final boolean read ) throws IOException { /* * Note that using only ASCII special character no special character is * outside Unicode BMP (thus is only 2 byte long). * * Characters outside BMP will be handled as non special chars and * simply written (so a multi-char character won't be corrupted). */ /* Read character as int. */ char current; /* Read character type. */ int type; /* Current field state. */ int state = FieldState.INITIAL; /* Loop on characters until an exit case is found. */ while ( true ) { /* * Check if buffer need to be refilled. Checking on BUFFER_SIZE * instead of bufferElements because is faster on a constant value * AND if bufferElements is less than BUFFER_SIZE it can't be * refilled (data already completely read into buffer) */ if ( bufferIndex >= BUFFER_SIZE ) { bufferElements = reader.read( buffer, 0 , BUFFER_SIZE ); bufferIndex = 0; } if( bufferElements == -1 || bufferIndex >= bufferElements ) /* Got an end of data. */ switch ( state ) { case FieldState.NORMAL_END: case FieldState.QUOTED_END: if( read ) builder.rollbackToMark(); case FieldState.NORMAL: case FieldState.INITIAL: case FieldState.DOUBLE_QUOTE: /* Normal field termination. */ return FieldEndReason.DATA_END; case FieldState.NORMAL_ESCAPE: /* Current field has an unterminated escape. */ throw new MalformedCSVException( "Solitary escape at end of data." ); case FieldState.QUOTED: /* Current field has an unclosed quote. */ throw new MalformedCSVException( "Unclosed quoted field at end of data." ); case FieldState.QUOTED_ESCAPE: /* Current field has an unclosed quote and an unterminated escape. */ throw new MalformedCSVException( "Unclosed quoted field and solitary escape at end of data." ); default: throw new IllegalStateException( "Unknown state: " + state + ". This is a bug evidence." ); } /* Read current character. */ current = buffer[ bufferIndex++ ]; /* Find the current character class. */ if( current < RemarkableASCII.ASCII_TABLE_SIZE ) type = types[ current ]; else type = CharacterClass.NORMAL; /* * We count also the characters to ignore * to know if the field is actually empty. */ ++ charCount; /* Handle current character depending on his class. */ switch ( state ^ type ) { /* ************************** */ /* *** FieldState.INITIAL *** */ /* ************************** */ case FieldState.INITIAL ^ CharacterClass.NORMAL: if( read ) builder.append( current ); state = FieldState.NORMAL; break; case FieldState.INITIAL ^ CharacterClass.TO_IGNORE: break; case FieldState.INITIAL ^ CharacterClass.TO_IGNORE_AROUND_FIELDS: break; case FieldState.INITIAL ^ CharacterClass.QUOTE: state = FieldState.QUOTED; break; case FieldState.INITIAL ^ CharacterClass.ESCAPE: state = FieldState.NORMAL_ESCAPE; break; case FieldState.INITIAL ^ CharacterClass.FIELD_SEPARATOR: return FieldEndReason.FIELD_SEPARATOR; case FieldState.INITIAL ^ CharacterClass.RECORD_SEPARATOR: return FieldEndReason.RECORD_SEPARATOR; case FieldState.INITIAL ^ CharacterClass.RECORD_SEPARATOR_1: if ( checkRecordSequence( current ) ) return FieldEndReason.RECORD_SEPARATOR; /* Handle as CharacterClass.NORMAL */ if( read ) builder.append( current ); state = FieldState.NORMAL; break; case FieldState.INITIAL ^ CharacterClass.RECORD_SEPARATOR_2: /* Handle as CharacterClass.NORMAL */ if( read ) builder.append( current ); state = FieldState.NORMAL; break; /* ************************* */ /* *** FieldState.NORMAL *** */ /* ************************* */ case FieldState.NORMAL ^ CharacterClass.NORMAL: if( read ) builder.append( current ); break; case FieldState.NORMAL ^ CharacterClass.TO_IGNORE: break; case FieldState.NORMAL ^ CharacterClass.TO_IGNORE_AROUND_FIELDS: state = FieldState.NORMAL_END; if( read ) { builder.mark(); builder.append( current ); } break; case FieldState.NORMAL ^ CharacterClass.QUOTE: /* Current field is unquoted but contains a not escaped quote. */ /* If quotes have to be handled less strictly */ if ( lazyQuotes ) { /* Handle as CharacterClass.NORMAL */ if( read ) builder.append( current ); break; } throw new MalformedCSVException( "Encountered an unescaped quote in a unquoted field." ); case FieldState.NORMAL ^ CharacterClass.ESCAPE: state = FieldState.NORMAL_ESCAPE; break; case FieldState.NORMAL ^ CharacterClass.FIELD_SEPARATOR: return FieldEndReason.FIELD_SEPARATOR; case FieldState.NORMAL ^ CharacterClass.RECORD_SEPARATOR: return FieldEndReason.RECORD_SEPARATOR; case FieldState.NORMAL ^ CharacterClass.RECORD_SEPARATOR_1: if ( checkRecordSequence( current ) ) return FieldEndReason.RECORD_SEPARATOR; /* Handle as CharacterClass.NORMAL */ if( read ) builder.append( current ); break; case FieldState.NORMAL ^ CharacterClass.RECORD_SEPARATOR_2: /* Handle as CharacterClass.NORMAL */ if( read ) builder.append( current ); break; /* ******************************** */ /* *** FieldState.NORMAL_ESCAPE *** */ /* ******************************** */ case FieldState.NORMAL_ESCAPE ^ CharacterClass.NORMAL: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_ESCAPE ^ CharacterClass.TO_IGNORE: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_ESCAPE ^ CharacterClass.TO_IGNORE_AROUND_FIELDS: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_ESCAPE ^ CharacterClass.QUOTE: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_ESCAPE ^ CharacterClass.ESCAPE: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_ESCAPE ^ CharacterClass.FIELD_SEPARATOR: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_ESCAPE ^ CharacterClass.RECORD_SEPARATOR: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_ESCAPE ^ CharacterClass.RECORD_SEPARATOR_1: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_ESCAPE ^ CharacterClass.RECORD_SEPARATOR_2: /* Handle as CharacterClass.NORMAL */ state = FieldState.NORMAL; if( read ) builder.append( current ); break; /* ***************************** */ /* *** FieldState.NORMAL_END *** */ /* ***************************** */ case FieldState.NORMAL_END ^ CharacterClass.NORMAL: state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_END ^ CharacterClass.TO_IGNORE: break; case FieldState.NORMAL_END ^ CharacterClass.TO_IGNORE_AROUND_FIELDS: if( read ) { builder.mark(); builder.append( current ); } break; case FieldState.NORMAL_END ^ CharacterClass.QUOTE: /* Current field is unquoted but contains a not escaped quote. */ /* If quotes have to be handled less strictly */ if ( lazyQuotes ) { /* Handle as CharacterClass.NORMAL */ state = FieldState.NORMAL; if( read ) builder.append( current ); break; } if( read ) builder.rollbackToMark(); throw new MalformedCSVException( "Encountered an unescaped quote in a unquoted field." ); case FieldState.NORMAL_END ^ CharacterClass.ESCAPE: state = FieldState.NORMAL_ESCAPE; break; case FieldState.NORMAL_END ^ CharacterClass.FIELD_SEPARATOR: if( read ) builder.rollbackToMark(); return FieldEndReason.FIELD_SEPARATOR; case FieldState.NORMAL_END ^ CharacterClass.RECORD_SEPARATOR: if( read ) builder.rollbackToMark(); return FieldEndReason.RECORD_SEPARATOR; case FieldState.NORMAL_END ^ CharacterClass.RECORD_SEPARATOR_1: if ( checkRecordSequence( current ) ) return FieldEndReason.RECORD_SEPARATOR; /* Handle as CharacterClass.NORMAL */ state = FieldState.NORMAL; if( read ) builder.append( current ); break; case FieldState.NORMAL_END ^ CharacterClass.RECORD_SEPARATOR_2: /* Handle as CharacterClass.NORMAL */ state = FieldState.NORMAL; if( read ) builder.append( current ); break; /* ************************* */ /* *** FieldState.QUOTED *** */ /* ************************* */ case FieldState.QUOTED ^ CharacterClass.NORMAL: if( read ) builder.append( current ); break; case FieldState.QUOTED ^ CharacterClass.TO_IGNORE: break; case FieldState.QUOTED ^ CharacterClass.TO_IGNORE_AROUND_FIELDS: if( read ) builder.append( current ); break; case FieldState.QUOTED ^ CharacterClass.QUOTE: state = FieldState.DOUBLE_QUOTE; break; case FieldState.QUOTED ^ CharacterClass.ESCAPE: state = FieldState.QUOTED_ESCAPE; break; case FieldState.QUOTED ^ CharacterClass.FIELD_SEPARATOR: if( read ) builder.append( current ); break; /* * character is considered ad RECORD_SEPARATOR_1 */ case FieldState.QUOTED ^ CharacterClass.RECORD_SEPARATOR: if( read ) builder.append( current ); break; case FieldState.QUOTED ^ CharacterClass.RECORD_SEPARATOR_1: if( read ) builder.append( current ); break; case FieldState.QUOTED ^ CharacterClass.RECORD_SEPARATOR_2: /* Handle as CharacterClass.NORMAL */ if( read ) builder.append( current ); break; /* ******************************** */ /* *** FieldState.QUOTED_ESCAPE *** */ /* ******************************** */ case FieldState.QUOTED_ESCAPE ^ CharacterClass.NORMAL: state = FieldState.QUOTED; if( read ) builder.append( current ); break; case FieldState.QUOTED_ESCAPE ^ CharacterClass.TO_IGNORE: state = FieldState.QUOTED; if( read ) builder.append( current ); break; case FieldState.QUOTED_ESCAPE ^ CharacterClass.TO_IGNORE_AROUND_FIELDS: state = FieldState.QUOTED; if( read ) builder.append( current ); break; case FieldState.QUOTED_ESCAPE ^ CharacterClass.QUOTE: state = FieldState.QUOTED; if( read ) builder.append( current ); break; case FieldState.QUOTED_ESCAPE ^ CharacterClass.ESCAPE: state = FieldState.QUOTED; if( read ) builder.append( current ); break; case FieldState.QUOTED_ESCAPE ^ CharacterClass.FIELD_SEPARATOR: state = FieldState.QUOTED; if( read ) builder.append( current ); break; case FieldState.QUOTED_ESCAPE ^ CharacterClass.RECORD_SEPARATOR: state = FieldState.QUOTED; if( read ) builder.append( current ); break; case FieldState.QUOTED_ESCAPE ^ CharacterClass.RECORD_SEPARATOR_1: state = FieldState.QUOTED; if( read ) builder.append( current ); break; case FieldState.QUOTED_ESCAPE ^ CharacterClass.RECORD_SEPARATOR_2: /* Handle as CharacterClass.NORMAL */ state = FieldState.QUOTED; if( read ) builder.append( current ); break; /* ***************************** */ /* *** FieldState.QUOTED_END *** */ /* ***************************** */ case FieldState.QUOTED_END ^ CharacterClass.NORMAL: /* Normal character outside a quoted field. */ throw new MalformedCSVException( "Encountered a normal character outside a quoted field." ); case FieldState.QUOTED_END ^ CharacterClass.TO_IGNORE: break; case FieldState.QUOTED_END ^ CharacterClass.TO_IGNORE_AROUND_FIELDS: break; case FieldState.QUOTED_END ^ CharacterClass.QUOTE: /* Quote character outside a quoted field. */ throw new MalformedCSVException( "Encountered a quote character outside a quoted field." ); case FieldState.QUOTED_END ^ CharacterClass.ESCAPE: /* Escape character outside a quoted field. */ throw new MalformedCSVException( "Encountered an escape character outside a quoted field." ); case FieldState.QUOTED_END ^ CharacterClass.FIELD_SEPARATOR: return FieldEndReason.FIELD_SEPARATOR; case FieldState.QUOTED_END ^ CharacterClass.RECORD_SEPARATOR: return FieldEndReason.RECORD_SEPARATOR; case FieldState.QUOTED_END ^ CharacterClass.RECORD_SEPARATOR_1: if ( checkRecordSequence( current ) ) return FieldEndReason.RECORD_SEPARATOR; /* Handle as CharacterClass.NORMAL */ /* Normal character outside a quoted field. */ throw new MalformedCSVException( "Invalid record separator sequence." ); case FieldState.QUOTED_END ^ CharacterClass.RECORD_SEPARATOR_2: /* Handle as CharacterClass.NORMAL */ /* Normal character outside a quoted field. */ throw new MalformedCSVException( "Invalid record separator sequence." ); /* ******************************* */ /* *** FieldState.DOUBLE_QUOTE *** */ /* ******************************* */ case FieldState.DOUBLE_QUOTE ^ CharacterClass.NORMAL: /* Normal character outside a quoted field. *//* * character è considerato RECORD_SEPARATOR_1 */ throw new MalformedCSVException( "Encountered a normal character outside a quoted field." ); case FieldState.DOUBLE_QUOTE ^ CharacterClass.TO_IGNORE: state = FieldState.QUOTED_END; break; case FieldState.DOUBLE_QUOTE ^ CharacterClass.TO_IGNORE_AROUND_FIELDS: state = FieldState.QUOTED_END; break; case FieldState.DOUBLE_QUOTE ^ CharacterClass.QUOTE: state = FieldState.QUOTED; if( read ) builder.append(current); break; case FieldState.DOUBLE_QUOTE ^ CharacterClass.ESCAPE: /* Escape character outside a quoted field. */ throw new MalformedCSVException( "Encountered an escape character outside a quoted field." ); case FieldState.DOUBLE_QUOTE ^ CharacterClass.FIELD_SEPARATOR: return FieldEndReason.FIELD_SEPARATOR; /* * character is considered as RECORD_SEPARATOR_1 */ case FieldState.DOUBLE_QUOTE ^ CharacterClass.RECORD_SEPARATOR: return FieldEndReason.RECORD_SEPARATOR; case FieldState.DOUBLE_QUOTE ^ CharacterClass.RECORD_SEPARATOR_1: if ( checkRecordSequence( current ) ) return FieldEndReason.RECORD_SEPARATOR; /* Handle as CharacterClass.NORMAL */ /* Normal character outside a quoted field. */ throw new MalformedCSVException( "Invalid record separator sequence." ); case FieldState.DOUBLE_QUOTE ^ CharacterClass.RECORD_SEPARATOR_2: /* Handle as CharacterClass.NORMAL */ /* Normal character outside a quoted field. */ throw new MalformedCSVException( "Invalid record separator sequence." ); default: throw new IllegalStateException( "Unknown couple FieldState " + state + " CharacterClass " + type + ". This is a bug evidence." ); } } } /** * Check if there is a correct record separator sequence following a * character of type {@link CharacterClass.RECORD_SEPARATOR_1}. *

* Provided character is assumed a * {@link CharacterClass.RECORD_SEPARATOR_1} and no further checks * will be done on his type. * *

* * @param character first character of a record separator sequence * @return {@code true} if a complete record separator sequence has been * read. * @throws IOException if an error occurs while parsing data. */ private boolean checkRecordSequence( final int character ) throws IOException { /* * Read next character to discover if is a second character record * separator. */ /* * Check if buffer need to be refilled. Checking on BUFFER_SIZE * instead of bufferElements because is faster on a constant value * AND if bufferElements is less than BUFFER_SIZE it can't be * refilled (data already completely read into buffer) */ if ( bufferIndex >= BUFFER_SIZE ) { bufferElements = reader.read( buffer, 0 , BUFFER_SIZE ); bufferIndex = 0; } /* No more elements can be read. */ if( bufferElements == -1 || bufferIndex >= bufferElements ) return false; /* Read next character. To be used as types array index. */ final char next = buffer[ bufferIndex ]; final int type; /* Find the next character class. */ if( next < RemarkableASCII.ASCII_TABLE_SIZE ) type = types[ next ]; else type = CharacterClass.NORMAL; if ( type == CharacterClass.RECORD_SEPARATOR_2 ) { /* Found a complete record separator. */ ++bufferIndex; return true; } /* Not a record separator. */ return false; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy