All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.Ostermiller.util.LabeledCSVParser Maven / Gradle / Ivy

Go to download

Open source (GPL) Java utilities maintained by Stephen Ostermiller with help from many contributors.

The newest version!
/*
 * Read files in comma separated value format with a fist line of labels.
 *
 * Copyright (C) 2004 Campbell, Allen T. 
 *
 * Copyright (C) 2004 Stephen Ostermiller
 * http://ostermiller.org/contact.pl?regarding=Java+Utilities
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * See COPYING.TXT for details.
 */
package com.Ostermiller.util;

import java.io.IOException;
import java.util.*;

/**
 * Decorate a CSVParse object to provide an index of field names.  Many (most?)
 * CSV files have a list of field names (labels) as the first line.  A
 * LabeledCSVParser will consume this line automatically.  The methods
 * {@link #getLabels()}, {@link #getLabelIndex(String)} and
 * {@link #getValueByLabel(String)} allow these labels to be discovered and
 * used while parsing CSV data.  This class can also be used to conveniently
 * ignore field labels if they happen to be present in a CSV file and are not
 * desired.
 *
 * @author Campbell, Allen T. 
 * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
 * @since ostermillerutils 1.03.00
 */
public class LabeledCSVParser implements CSVParse {

	/**
	 * Class which actually does the parsing.  Called for most methods.
	 *
	 * @since ostermillerutils 1.03.00
	 */
	private CSVParse parse;

	/**
	 * The first line of the CSV file - treated specially as labels.
	 * Set by setLabels.
	 *
	 * @since ostermillerutils 1.03.00
	 */
	private String[] labels;

	/**
	 * Hash of the labels (String) to column number (Integer).
	 * Set by setLabels.
	 *
	 * @since ostermillerutils 1.03.00
	 */
	private Map labelMap;

	/**
	 * The last line read from the CSV file.  Saved for getValueByLabel().
	 *
	 * @since ostermillerutils 1.03.00
	 */
	private String[] lastLine;

	/**
	 * Set whenever nextValue is called and checked when getValueByLabel() is
	 * called to enforce incompatibility between the methods.
	 *
	 * @since ostermillerutils 1.03.00
	 */
	private int nextValueLine = -2;

	/**
	 * Construct a LabeledCSVParser on a CSVParse implementation.
	 *
	 * @param parse CSVParse implementation
	 * @throws IOException if an error occurs while reading.
	 *
	 * @since ostermillerutils 1.03.00
	 */
	@SuppressWarnings("unused") public LabeledCSVParser(CSVParse parse) throws IOException {
		this.parse = parse;
	}

	/**
	 * Change this parser so that it uses a new delimiter.
	 * 

* The initial character is a comma, the delimiter cannot be changed * to a quote or other character that has special meaning in CSV. * * @param newDelim delimiter to which to switch. * @throws BadDelimiterException if the character cannot be used as a delimiter. * * @since ostermillerutils 1.03.00 */ public void changeDelimiter(char newDelim) throws BadDelimiterException { parse.changeDelimiter(newDelim); } /** * Change this parser so that it uses a new character for quoting. *

* The initial character is a double quote ("), the delimiter cannot be changed * to a comma or other character that has special meaning in CSV. * * @param newQuote character to use for quoting. * @throws BadQuoteException if the character cannot be used as a quote. * * @since ostermillerutils 1.03.00 */ public void changeQuote(char newQuote) throws BadQuoteException { parse.changeQuote(newQuote); } /** * Get all the values from the file. *

* If the file has already been partially read, only the * values that have not already been read will be included. *

* Each line of the file that has at least one value will be * represented. Comments and empty lines are ignored. *

* The resulting double array may be jagged. *

* The last line of the values is saved and may be accessed * by getValueByLabel(). * * @return all the values from the file or null if there are no more values. * @throws IOException if an error occurs while reading. * * @since ostermillerutils 1.03.00 */ public String[][] getAllValues() throws IOException { if (labels == null) setLabels(); String[][] allValues = parse.getAllValues(); if (allValues == null){ lastLine = null; } else { lastLine = allValues[allValues.length-1]; } return allValues; } /** * Get the line number that the last token came from. *

* New line breaks that occur in the middle of a token are not * counted in the line number count. *

* The first line of labels does not count towards the line number. * * @return line number or -1 if no tokens have been returned yet. * * @since ostermillerutils 1.03.00 */ public int getLastLineNumber(){ return lastLineNumber(); } /** * Get the line number that the last token came from. *

* New line breaks that occur in the middle of a token are not * counted in the line number count. *

* The first line of labels does not count towards the line number. * * @return line number or -1 if no tokens have been returned yet. * * @since ostermillerutils 1.03.00 */ public int lastLineNumber(){ int lineNum = parse.getLastLineNumber(); if (lineNum <= -1) return -1; // Nothing has been read yet if (lineNum == 1) return -1; // only labels have been read return lineNum - 1; // adjust line number to account for the label line } /** * Get all the values from a line. *

* If the line has already been partially read, only the values that have not * already been read will be included. *

* In addition to returning all the values from a line, LabeledCSVParser * maintains a buffer of the values. This feature allows * {@link #getValueByLabel(String)} to function. In this case * {@link #getLine()} is used simply to iterate CSV data. The iteration ends * when null is returned. *

* Note: The methods {@link #nextValue()} and {@link #getAllValues()} * are incompatible with {@link #getValueByLabel(String)} because the former * methods cause the offset of field values to shift and corrupt the internal * buffer maintained by {@link #getLine}. * * @return all the values from the line or null if there are no more values. * @throws IOException if an error occurs while reading. * * @since ostermillerutils 1.03.00 */ public String[] getLine() throws IOException { if (labels == null) setLabels(); lastLine = parse.getLine(); return lastLine; } /** * Read the next value from the file. The line number from * which this value was taken can be obtained from getLastLineNumber(). *

* This method is not compatible with getValueByLabel(). Using this * method will make getValueByLabel() throw an IllegalStateException * for the rest of the line. * * @return the next value or null if there are no more values. * @throws IOException if an error occurs while reading. * * @since ostermillerutils 1.03.00 */ public String nextValue() throws IOException { if (labels == null) setLabels(); String nextValue = parse.nextValue(); nextValueLine = getLastLineNumber(); return nextValue; } /** * Initialize the LabeledCSVParser.labels member and LabeledCSVParser.labelMap * member. * * @throws IOException if an IO error occurs * * @since ostermillerutils 1.03.00 */ private void setLabels() throws IOException { labels = parse.getLine(); if (labels == null) return; labelMap = new HashMap(); for (int i = 0; i < labels.length; i++){ labelMap.put(labels[i], new Integer(i)); } } /** * Return an array of all field names from the top * of the CSV file. * * @return Field names. * @throws IOException if an IO error occurs * * @since ostermillerutils 1.03.00 */ public String[] getLabels() throws IOException { if (labels == null) setLabels(); return labels; } /** * Get the index of the column having the given label. * The {@link #getLine()} method returns an * array of field values for a single record of data. This method returns * the index of a member of that array based on the specified field name. * The first field has the index 0. * * @param label The field name. * @return The index of the field name, or -1 if the label does not exist. * @deprecated may swallow an IOException while reading the labels - please use getLabelIdx() * * @since ostermillerutils 1.03.00 */ @Deprecated public int getLabelIndex(String label){ try { return getLabelIdx(label); } catch (IOException iox){ return -1; } } /** * Get the index of the column having the given label. * The {@link #getLine()} method returns an * array of field values for a single record of data. This method returns * the index of a member of that array based on the specified field name. * The first field has the index 0. * * @param label The field name. * @return The index of the field name, or -1 if the label does not exist. * @throws IOException if an IO error occurs * * @since ostermillerutils 1.04.02 */ public int getLabelIdx(String label) throws IOException { if (labels == null) setLabels(); if (labelMap == null) return -1; if (!labelMap.containsKey(label)) return -1; return (labelMap.get(label)).intValue(); } /** * Given the label for the column, get the column from the last line that * was read. If the column cannot be found in the line, null is returned. * * @param label The field name. * @throws IllegalStateException if nextValue has been called as part of getting the last line. nextValue is not compatible with this method. * @return the value from the last line read or null if there is no such value * * @since ostermillerutils 1.03.00 */ public String getValueByLabel(String label) throws IllegalStateException { if (nextValueLine == getLastLineNumber()) throw new IllegalStateException("nextValue() was used to get values from this line."); if (lastLine == null) return null; int fieldIndex; try { fieldIndex = getLabelIdx(label); } catch (IOException iox){ // Can't happen here because the labels have been read before the first line. throw new RuntimeException(iox); } if (fieldIndex == -1) return null; if (fieldIndex >= lastLine.length) return null; return lastLine[fieldIndex]; } /** * Close any stream upon which this parser is based. * * @throws IOException if an error occurs while closing the stream. * * @since ostermillerutils 1.03.00 */ public void close() throws IOException { parse.close(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy