All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.util.io.DelimitedTextParser Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.util.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

import com.hfg.util.StringUtil;
import com.hfg.util.collection.DataTable;

//------------------------------------------------------------------------------
/**
 Base class for CSV (comma-separated value) and TSV (tab-separated value).
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class DelimitedTextParser { private char mDelimiter; //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public DelimitedTextParser(char inDelimiter) { mDelimiter = inDelimiter; } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public String escapeField(String inField) { String result = inField; if (StringUtil.isSet(result) && (result.contains("\"") || result.contains(mDelimiter + ""))) { result = "\"" + result.replaceAll("\"", "\"\"") + "\""; } return result; } //--------------------------------------------------------------------------- public DataTable parseToDataTable(List inLines) throws IOException { List lines = parse(inLines); return new DataTable(lines); } //--------------------------------------------------------------------------- public DataTable parseToDataTable(Reader inReader) throws IOException { List lines = parse(inReader); return new DataTable(lines); } //--------------------------------------------------------------------------- public List parse(Reader inReader) throws IOException { List parsedLines = new ArrayList<>(); BufferedReader bufferedReader = null; try { if (inReader instanceof BufferedReader) { bufferedReader = (BufferedReader) inReader; } else { bufferedReader = new BufferedReader(inReader); } String line; while ((line = bufferedReader.readLine()) != null) { parsedLines.add(parseLine(line)); } } finally { StreamUtil.close(bufferedReader); } return parsedLines; } //--------------------------------------------------------------------------- public List parse(List inLines) throws IOException { List parsedLines = new ArrayList<>(); for (String line : inLines) { parsedLines.add(parseLine(line)); } return parsedLines; } //--------------------------------------------------------------------------- public String[] parseLine(String inLine) throws IOException { List fields = new ArrayList<>(); boolean inQuotedValue = false; int quoteCount = 0; char currentQuoteChar = ' '; StringBuilder currentValue = new StringBuilder(); int index = 0; while (index < inLine.length()) { int theChar = inLine.charAt(index); if (inQuotedValue) { if (theChar == currentQuoteChar) { quoteCount++; if (2 == quoteCount) { // Skip quoteCount = 0; } else if ((index == inLine.length() - 1 || inLine.charAt(index + 1) != currentQuoteChar) && (0 == currentValue.length() || currentValue.charAt(currentValue.length() - 1) != '\\')) { inQuotedValue = false; String unescapedValue = StringUtil.replaceAll(currentValue, "\\" + currentQuoteChar, currentQuoteChar + ""); currentValue.setLength(0); currentValue.append(unescapedValue); } else { currentValue.append((char) theChar); } } else { currentValue.append((char) theChar); quoteCount = 0; } } else if (theChar == mDelimiter) { fields.add(currentValue.length() > 0 ? currentValue.toString().trim() : null); currentValue.setLength(0); } else if (Character.isWhitespace(theChar) && 0 == currentValue.length()) { // Skip whitespace between the comma and the value } else if ((theChar == '\'' || theChar == '\"') && 0 == currentValue.length()) { // Start of a quoted value inQuotedValue = true; quoteCount = 0; currentQuoteChar = (char) theChar; } else { currentValue.append((char) theChar); } index++; } fields.add(currentValue.length() > 0 ? currentValue.toString().trim() : null); return fields.toArray(new String[] {}); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy