All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.freeutils.util.CSVParser Maven / Gradle / Ivy

The newest version!
/*
 *  Copyright © 2003-2024 Amichai Rothman
 *
 *  This file is part of JElementary - the Java Elementary Utilities package.
 *
 *  JElementary is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  JElementary is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with JElementary.  If not, see .
 *
 *  For additional info see https://www.freeutils.net/source/jelementary/
 */

package net.freeutils.util;

import static net.freeutils.util.CSVParser.State.*;
import java.io.*;
import java.util.*;

/**
 * The {@code CSVParser} class parses CSV (comma separated values) content.
 * 

* Note: there are many variations on the CSV format used in existing applications. * This implementation follows the format defined in RFC 4180, with the addition * of allowing a single LF character to terminate a line, in addition to the standard CRLF. */ public class CSVParser implements Iterable, Closeable { protected enum State { START, MIDDLE, QUOTE, QUOTE2 } protected BufferedReader in; protected boolean read; protected String[] header; protected int[] columns; /** * Escapes the given value by surrounding it with double quotes, and escaping * any double-quote character within it with another double-quote character, * in accordance with the CSV format. * * @param value the value to escape * @return the escaped value */ public static String escape(String value) { return value == null || value.isEmpty() ? "\"\"" : ('"' + Strings.replace(value, "\"", "\"\"") + '"'); } /** * Converts the given individual values into a single CSV-formatted line. * * @param values the values (columns) * @return the CSV-formatted line */ public static String toLine(String... values) { String[] quoted = new String[values.length]; for (int i = 0; i < values.length; i++) quoted[i] = escape(values[i]); return Strings.join(",", quoted) + "\r\n"; } /** * Creates a new CSV parser. * * @param in the underlying CSV data stream * @throws NullPointerException if in is null */ public CSVParser(Reader in) { this.in = in instanceof BufferedReader ? (BufferedReader)in : new BufferedReader(in); } /** * Creates a new CSV parser. * * @param in the underlying CSV data stream in UTF-8 encoding * @throws NullPointerException if in is null */ public CSVParser(InputStream in) { try { this.in = new BufferedReader(new InputStreamReader(in, "UTF-8")); } catch (UnsupportedEncodingException ignore) { // can't happen } } /** * Creates a new CSV parser. * * @param file the CSV file to parse * * @throws FileNotFoundException if the file cannot be found * @throws NullPointerException if in is null */ public CSVParser(File file) throws FileNotFoundException { this(new FileInputStream(file)); } /* * Sets which columns will be returned by the parser and in which order. * * @param columns the ordered column indices (zero-based) to be returned */ public void setColumns(int... columns) { this.columns = columns; } /* * Sets which columns will be returned by the parser and in which order. * * @param columns the ordered column names to be returned. These names * must correspond to the values of the CSV header, which is * the first line of CSV data */ public void setColumns(String... columnNames) throws IOException { String[] header = getHeader(); int[] columns = new int[columnNames.length]; for (int i = 0; i < columns.length; i++) { int found = -1; for (int j = 0; found < 0 && j < header.length; j++) { if (header[j].equals(columnNames[i])) found = j; } if (found < 0) throw new IllegalArgumentException("column '" + columnNames[i] + "' not found"); columns[i] = found; } setColumns(columns); } /** * Filters the given full record, returning only the columns specified * by one of the {@link #setColumns} methods in their requested order. * * @param record a full record (parsed CSV line) * @return the filtered column values */ protected String[] filterColumns(String[] record) { if (columns == null) return record; String[] data = new String[columns.length]; for (int i = 0; i < data.length; i++) data[i] = record[columns[i]]; return data; } /** * Returns the header values. The header is the first CSV line, * and is interpreted as the column names. This method must be * called before any data has been read. Further invocations * are allowed, and will always return the original header. * * @return the header values * @throws IOException if the header cannot be read * @throws IllegalStateException if data has been read before * the first call to this method */ public String[] getHeader() throws IOException { if (header == null) { ensureNotRead(); header = nextRecord(); } return header; } /** * Ensures that no data has been read yet. * * @throws IllegalStateException if data has already been read */ protected void ensureNotRead() throws IllegalStateException { if (read) throw new IllegalStateException("data has already been read"); } /** * Reads and returns the values of the next record in the CSV stream * * @return the values of the next record, or null if the stream end has been reached * @throws IOException if an error occurs */ public String[] nextRecord() throws IOException { read = true; List values = new ArrayList<>(); StringBuilder sb = new StringBuilder(64); State state = START; for (;;) { int c = in.read(); switch (c) { case ',': if (state == QUOTE) sb.append((char)c); else { values.add(sb.toString()); sb.setLength(0); state = START; } break; case '"': if (state == MIDDLE) throw new IOException("illegal quote in middle of value"); if (state == START) state = QUOTE; else if (state == QUOTE) state = QUOTE2; else if (state == QUOTE2) { sb.append((char)c); state = QUOTE; } break; case '\r': if (state == QUOTE) sb.append((char)c); break; case '\n': if (state == START && values.isEmpty()) return new String[0]; if (state == QUOTE) sb.append((char)c); else { values.add(sb.toString()); return filterColumns(Containers.toArray(values, String.class)); } break; case -1: if (state == QUOTE) throw new IOException("missing end quote"); if (state == START && values.isEmpty()) return null; values.add(sb.toString()); return filterColumns(Containers.toArray(values, String.class)); default: if (state == QUOTE2) throw new IOException("illegal character after quoted value"); sb.append((char)c); if (state == START) state = MIDDLE; break; } } } /** * Closes the underlying Reader. * * @throws IOException if the operation fails */ @Override public void close() throws IOException { in.close(); } /** * Returns an iterator over the CSV records (lines). Records are iterated * using the {@link #nextRecord} method. Exceptions thrown while iterating * are wrapped inside a {@code RuntimeException}. Note that the {@link #close} * method must be called when the iterator is no longer needed. * * @return an iterator over the CSV records (lines) */ @Override public Iterator iterator() { return new Iterator() { private String[] next; @Override public boolean hasNext() { try { return next != null || (next = nextRecord()) != null; } catch (IOException ioe) { throw new RuntimeException("error reading record", ioe); } } @Override public String[] next() { if (!hasNext()) throw new NoSuchElementException(); String[] next = this.next; this.next = null; return next; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } /** * Returns the entire CSV data contents as an ordered map, with * the first column as the key and the entire record (including * the first column) as the value. *

* Note that the {@link #setColumns} methods can be used to * filter the output, as well as order the record in such * a way that the first column becomes the desired key. * * @throws IOException if an error occurs * @return the entire CSV data contents as an ordered map */ public Map toMap() throws IOException { Map map = new LinkedHashMap<>(); String[] record; while ((record = nextRecord()) != null) map.put(record[0], record); return map; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy