All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.libutil.CsvParser Maven / Gradle / Ivy

There is a newer version: 1.20.0
Show newest version
/*
 * The MIT License
 *
 * Copyright 2021 Takashi Harano
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package com.libutil;

import java.util.ArrayList;
import java.util.List;

/**
 * Parses a CSV text.
 * 

* This Parser is meant to parse according to the RFC4180 specification.
* https://www.ietf.org/rfc/rfc4180.txt *

*/ public class CsvParser { private static final String DEFAULT_SEPARATOR = ","; private static final String DEFAULT_QUOTATION = "\""; private String separator; private String quotation; private String escapedQuotation; private String quotedEmpty; /** * Constructs a CSV parser with the separator ",". */ public CsvParser() { this(DEFAULT_SEPARATOR); } /** * Constructs a CSV parser with the specified separator. * * @param separator * the character that the parser will treat as the separator */ public CsvParser(String separator) { this(separator, DEFAULT_QUOTATION); } /** * Constructs a CSV parser with the specified separator and quotation. * * @param separator * the character that the parser will treat as the separator * @param quotation * the character for quote */ public CsvParser(String separator, String quotation) { this.separator = separator; setQuotation(quotation); } /** * Parses the CSV text and returns an array of elements. * * @param csvText * The string to parse * @return The list of elements. [ROW][COL] */ public String[][] parse(String csvText) { csvText = csvText.replaceAll("\r\n|\r", "\n"); List> rows = new ArrayList<>(); List cols = new ArrayList<>(); int enveloped = 0; int quotCount = 0; int colBeginIndex = 0; int len = csvText.length(); int i; for (i = 0; i < len; i++) { String c = String.valueOf(csvText.charAt(i)); if (c.equals(quotation)) { quotCount++; if (colBeginIndex == i) { enveloped = -1; } } else if (separator.equals(c) || "\n".equals(c)) { if (enveloped == 1) { if (!isQuotedPropery(csvText, len, i, quotCount)) { enveloped = 0; } } if ((enveloped == 0) || (enveloped != 0) && (quotCount % 2 == 0)) { String val = getColumnValue(csvText, colBeginIndex, i, enveloped); cols.add(val); colBeginIndex = i + 1; enveloped = 0; quotCount = 0; if ("\n".equals(c)) { storeRow(rows, cols); cols = new ArrayList<>(); } } } else { if (enveloped == -1) { enveloped = (quotCount % 2 == 0) ? 0 : 1; } } } int lastIndex = i - 1; if (lastIndex > 0) { String lastCh = String.valueOf(csvText.charAt(lastIndex)); if ((enveloped != 0) && !quotation.equals(lastCh)) { enveloped = 0; colBeginIndex++; } } String val = getColumnValue(csvText, colBeginIndex, i, enveloped); cols.add(val); if ((!((cols.size() == 1) && "".equals(cols.get(0)))) || (isLastLineEmpty(csvText))) { storeRow(rows, cols); } String[][] arrRows = new String[rows.size()][cols.size()]; int size = rows.size(); for (i = 0; i < size; i++) { List row = rows.get(i); String[] aCols = new String[row.size()]; row.toArray(aCols); arrRows[i] = aCols; } return arrRows; } /** * Parses one record of the CSV text and returns an array of the elements.
* Generally, a record is one row of data. * * @param csvText * one row of the CSV * @return The list of elements */ public String[] parseOneRecord(String csvText) { csvText = csvText.replaceAll("\r\n|\r", "\n"); List cols = new ArrayList<>(); int enveloped = 0; int quotCount = 0; int colBeginIndex = 0; int len = csvText.length(); int i; for (i = 0; i < len; i++) { String c = String.valueOf(csvText.charAt(i)); if (c.equals(quotation)) { quotCount++; if (colBeginIndex == i) { enveloped = -1; } } else if (separator.equals(c)) { if (enveloped == 1) { if (!isQuotedPropery(csvText, len, i, quotCount)) { enveloped = 0; } } if ((enveloped == 0) || (enveloped != 0) && (quotCount % 2 == 0)) { String val = getColumnValue(csvText, colBeginIndex, i, enveloped); cols.add(val); colBeginIndex = i + 1; enveloped = 0; quotCount = 0; } } else { if (enveloped == -1) { enveloped = (quotCount % 2 == 0) ? 0 : 1; } } } int lastIndex = i - 1; if (lastIndex > 0) { String lastCh = String.valueOf(csvText.charAt(lastIndex)); if ((enveloped != 0) && !quotation.equals(lastCh)) { enveloped = 0; colBeginIndex++; } } String val = getColumnValue(csvText, colBeginIndex, i, enveloped); cols.add(val); String[] fields = new String[cols.size()]; cols.toArray(fields); return fields; } /** * Returns the character that the parser will treat as the separator. * * @return the separator */ public String getSeparator() { return separator; } /** * Sets the character that the parser will treat as the separator. * * @param separator * the separator */ public void setSeparator(String separator) { this.separator = separator; } /** * Returns the character that the parser will treat as the quotation. * * @return the quotation */ public String getQuotation() { return quotation; } /** * Sets the character that the parser will treat as the quotation. * * @param quotation * the quotation */ public void setQuotation(String quotation) { this.quotation = quotation; this.escapedQuotation = quotation + quotation; this.quotedEmpty = quotation + quotation; } private String getColumnValue(String s, int colBeginIndex, int p, int enveloped) { String val = s.substring(colBeginIndex, p); if (quotedEmpty.equals(val)) { val = ""; } else { if ((enveloped != 0) && (val.length() >= 2)) { val = val.substring(1, val.length() - 1); } val = val.replace(escapedQuotation, quotation); } return val; } private void storeRow(List> rows, List cols) { rows.add(cols); } private boolean isLastLineEmpty(String s) { if (s.length() <= 2) { return false; } String last = s.substring(s.length() - 2); if (quotedEmpty.equals(last)) { return true; } return false; } private boolean isQuotedPropery(String s, int len, int pos, int quotCount) { if (quotCount % 2 == 0) { if (pos + 1 < len) { String lastCh = String.valueOf(s.charAt(pos - 1)); if (!quotation.equals(lastCh)) { return false; } } } return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy