All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sfm.csv.parser.CsvReader Maven / Gradle / Ivy

package org.sfm.csv.parser;

import java.io.IOException;
import java.io.Reader;

public final class CsvReader {

	public static final int STOP = 8;
	public static final int IN_QUOTE = 4;
	public static final int IN_CR = 2;
	public static final int QUOTE = 1;
	public static final int NONE = 0;

	public static final int TURN_OFF_IN_CR_MASK = ~IN_CR;
	public static final int ALL_QUOTES = QUOTE | IN_QUOTE;

	private char[] buffer;
	private final CharsCellHandler handler;
	private final Reader reader;
	private int bufferLength;
	
	private int currentState = NONE;

	private int currentStart = 0;
	private int bufferOffset = 0;

	public CsvReader(final int bufferSize, final CharsCellHandler handler, final Reader reader) {
		this.buffer = new char[bufferSize];
		this.handler = handler;
		this.reader = reader;
	}

	/**
	 * parse cvs
	 * 
	 * @return
	 * @throws IOException
	 */
	public void parse()
			throws IOException {
		while ((bufferLength = reader.read(buffer, bufferOffset, buffer.length
				- bufferOffset)) != -1
				&& currentState != STOP) {
			bufferLength += bufferOffset;
			consumeBytes();
			shiftBuffer();
		}

		if (bufferOffset > 0 && currentState != STOP) {
			handler.newCell(buffer, 0, bufferOffset);
		}
		handler.end();
	}

	private void consumeBytes() {
		int bufferIndex = 0;
		while (bufferIndex < bufferLength && currentState != STOP) {
			handleChar(buffer[bufferIndex], bufferIndex);
			bufferIndex++;
		}
	}

	private void handleChar(final char c, final int bufferIndex) {
		switch (c) {
		case ',':
			newCellIfNotInQuote(bufferIndex);
			break;
		case '\n':
			handleEndOfLineLF(bufferIndex);
			break;
		case '\r':
			handleEndOfLineCR(bufferIndex);
			return;
		case '"':
			quote(bufferIndex);
			break;
		}
		currentState = currentState & TURN_OFF_IN_CR_MASK;
	}

	private void newCellIfNotInQuote(final int bufferIndex) {
		if (isInQuote())
			return;
		
		newCell(bufferIndex);
	}

	private void handleEndOfLineLF(final int bufferIndex) {
		if (isInQuote())
			return;

		if (currentState != IN_CR) {
			endOfRow(bufferIndex);
		} else {
			currentStart++;
		}
	}

	private boolean isInQuote() {
		return currentState == IN_QUOTE;
	}

	private void handleEndOfLineCR(final int bufferIndex) {
		if (isInQuote())
			return;

		endOfRow(bufferIndex);
		currentState = IN_CR;
	}

	private void endOfRow(int bufferIndex) {
		newCell(bufferIndex);
		if (!handler.endOfRow()) {
			currentState = STOP;
		}
	}

	private void quote(final int i) {
		if (currentStart == i) {
			currentState = IN_QUOTE;
		} else {
			currentState = currentState ^ ALL_QUOTES;
		}
	}

	private void newCell(final int i) {
		handler.newCell(buffer, currentStart, i - currentStart);
		currentStart = i + 1;
		currentState = NONE;
	}

	private void shiftBuffer() {
		// shift buffer consumer data
		bufferOffset = bufferLength - currentStart;

		// if buffer tight double the size
		if (bufferOffset > bufferLength >> 1) {
			// double buffer size
			char[] newbuffer = new char[buffer.length << 1];
			System.arraycopy(buffer, currentStart, newbuffer, 0, bufferOffset);
			buffer = newbuffer;
		} else {
			System.arraycopy(buffer, currentStart, buffer, 0, bufferOffset);
		}
		currentStart = 0;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy