All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.util.io.DelimitedTextParser Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.util.io;


import com.hfg.exception.DataParsingException;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.DataColumn;
import com.hfg.util.collection.DataTable;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

//------------------------------------------------------------------------------
/**
 Base class for CSV (comma-separated value) and TSV (tab-separated value).
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class DelimitedTextParser { private char mDelimiter; //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public DelimitedTextParser(char inDelimiter) { mDelimiter = inDelimiter; } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public String escapeField(String inField) { String result = inField; if (StringUtil.isSet(result) && (result.contains("\"") || result.contains(mDelimiter + ""))) { result = "\"" + result.replaceAll("\"", "\"\"") + "\""; } return result; } //--------------------------------------------------------------------------- public DataTable parseToDataTable(Reader inReader) throws IOException { List lines = parse(inReader); DataTable dataTable = new DataTable(); Map colMap = new HashMap<>(10); boolean headerParsed = false; int rowIndex = 0; for (String[] fields : lines) { if (! headerParsed) { // Skip blank lines if (1 == fields.length && ! StringUtil.isSet(fields[0])) { continue; } for (int i = 0; i < fields.length; i++) { String field = fields[i].trim(); DataColumn col = new DataColumn(field); colMap.put(i, col); } headerParsed = true; } else { rowIndex++; if (fields.length > colMap.size()) { throw new DataParsingException("Row " + rowIndex + " has more fields (" + fields.length + ") than the number of columns (" + colMap.size() + ")!"); } for (int i = 0; i < fields.length; i++) { String fieldString = fields[i]; Comparable field = null; if (fieldString != null) { fieldString = fieldString.trim(); if (StringUtil.isNumber(fieldString)) { try { if (fieldString.contains(".")) { field = Double.parseDouble(fieldString); } else if (fieldString.length() > 9) { field = Long.parseLong(fieldString); } else { field = Integer.parseInt(fieldString); } } catch (NumberFormatException e) { field = fieldString; } } else { field = fieldString; } } dataTable.put(rowIndex + "", colMap.get(i), field); } } } return dataTable; } //--------------------------------------------------------------------------- public List parse(Reader inReader) throws IOException { List parsedLines = new ArrayList<>(); BufferedReader bufferedReader = null; try { if (inReader instanceof BufferedReader) { bufferedReader = (BufferedReader) inReader; } else { bufferedReader = new BufferedReader(inReader); } String line; while ((line = bufferedReader.readLine()) != null) { parsedLines.add(parseLine(line)); } } finally { StreamUtil.close(bufferedReader); } return parsedLines; } //--------------------------------------------------------------------------- public String[] parseLine(String inLine) throws IOException { List fields = new ArrayList<>(); boolean inQuotedValue = false; int quoteCount = 0; char currentQuoteChar = ' '; StringBuilder currentValue = new StringBuilder(); int index = 0; while (index < inLine.length()) { int theChar = inLine.charAt(index); if (inQuotedValue) { if (theChar == currentQuoteChar) { quoteCount++; if (2 == quoteCount) { // Skip quoteCount = 0; } else if ((index == inLine.length() - 1 || inLine.charAt(index + 1) != currentQuoteChar) && (0 == currentValue.length() || currentValue.charAt(currentValue.length() - 1) != '\\')) { inQuotedValue = false; String unescapedValue = StringUtil.replaceAll(currentValue, "\\" + currentQuoteChar, currentQuoteChar + ""); currentValue.setLength(0); currentValue.append(unescapedValue); } else { currentValue.append((char) theChar); } } else { currentValue.append((char) theChar); quoteCount = 0; } } else if (theChar == mDelimiter) { fields.add(currentValue.length() > 0 ? currentValue.toString().trim() : null); currentValue.setLength(0); } else if (Character.isWhitespace(theChar) && 0 == currentValue.length()) { // Skip whitespace between the comma and the value } else if ((theChar == '\'' || theChar == '\"') && 0 == currentValue.length()) { // Start of a quoted value inQuotedValue = true; quoteCount = 0; currentQuoteChar = (char) theChar; } else { currentValue.append((char) theChar); } index++; } fields.add(currentValue.length() > 0 ? currentValue.toString().trim() : null); return fields.toArray(new String[] {}); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy