org.jgrapht.ext.CSVImporter Maven / Gradle / Ivy
* (C) Copyright 2016-2016, by Dimitrios Michail and Contributors.
* JGraphT : a free Java graph-theory library
* This program and the accompanying materials are dual-licensed under
* either
* (a) the terms of the GNU Lesser General Public License version 2.1
* as published by the Free Software Foundation, or (at your option) any
* later version.
* or (per the licensee's choosing)
* (b) the terms of the Eclipse Public License v1.0 as published by
* the Eclipse Foundation.
package org.jgrapht.ext;
import java.util.*;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.misc.*;
import org.antlr.v4.runtime.tree.*;
import org.jgrapht.*;
* Imports a graph from a CSV Format or any other Delimiter-separated value format.
* The importer supports various different formats which can be adjusted using the
* {@link #setFormat(CSVFormat) setFormat} method. The supported formats are the same CSV formats
* used by Gephi . For some
* of the formats, the behavior of the importer can be adjusted using the
* {@link #setParameter(org.jgrapht.ext.CSVFormat.Parameter, boolean) setParameter} method. See
* {@link CSVFormat} for a description of the formats.
* The importer respects rfc4180. The caller can
* also adjust the separator to something like semicolon or pipe instead of comma. In such a case,
* all fields are unescaped using the new separator. See
* Delimiter- separated
* values for more information.
* This importer does not distinguish between {@link CSVFormat#EDGE_LIST} and
* {@link CSVFormat#ADJACENCY_LIST}. In both cases it assumes the format is
* {@link CSVFormat#ADJACENCY_LIST}.
* @see CSVFormat
* @param the graph vertex type
* @param the graph edge type
* @author Dimitrios Michail
* @since August 2016
public class CSVImporter
implements GraphImporter
private static final char DEFAULT_DELIMITER = ',';
private CSVFormat format;
private VertexProvider vertexProvider;
private EdgeProvider edgeProvider;
private char delimiter;
private final Set parameters;
* Constructs a new importer using the {@link CSVFormat#ADJACENCY_LIST} format as default.
* @param vertexProvider provider for the generation of vertices. Must not be null.
* @param edgeProvider provider for the generation of edges. Must not be null.
public CSVImporter(VertexProvider vertexProvider, EdgeProvider edgeProvider)
this(vertexProvider, edgeProvider, CSVFormat.ADJACENCY_LIST, DEFAULT_DELIMITER);
* Constructs a new importer.
* @param vertexProvider provider for the generation of vertices. Must not be null.
* @param edgeProvider provider for the generation of edges. Must not be null.
* @param format format to use out of the supported ones
public CSVImporter(
VertexProvider vertexProvider, EdgeProvider edgeProvider, CSVFormat format)
this(vertexProvider, edgeProvider, format, DEFAULT_DELIMITER);
* Constructs a new importer.
* @param vertexProvider provider for the generation of vertices. Must not be null.
* @param edgeProvider provider for the generation of edges. Must not be null.
* @param format format to use out of the supported ones
* @param delimiter delimiter to use (comma, semicolon, pipe, etc.)
public CSVImporter(
VertexProvider vertexProvider, EdgeProvider edgeProvider, CSVFormat format,
char delimiter)
if (vertexProvider == null) {
throw new IllegalArgumentException("Vertex provider cannot be null");
this.vertexProvider = vertexProvider;
if (edgeProvider == null) {
throw new IllegalArgumentException("Edge provider cannot be null");
this.edgeProvider = edgeProvider;
this.format = format;
if (!DSVUtils.isValidDelimiter(delimiter)) {
throw new IllegalArgumentException("Character cannot be used as a delimiter");
this.delimiter = delimiter;
this.parameters = new HashSet<>();
* Get the format that the importer is using.
* @return the input format
public CSVFormat getFormat()
return format;
* Set the format of the importer
* @param format the format to use
public void setFormat(CSVFormat format)
this.format = format;
* Get the delimiter (comma, semicolon, pipe, etc).
* @return the delimiter
public char getDelimiter()
return delimiter;
* Set the delimiter (comma, semicolon, pipe, etc).
* @param delimiter the delimiter to use
public void setDelimiter(char delimiter)
if (!DSVUtils.isValidDelimiter(delimiter)) {
throw new IllegalArgumentException("Character cannot be used as a delimiter");
this.delimiter = delimiter;
* Return if a particular parameter of the exporter is enabled
* @param p the parameter
* @return {@code true} if the parameter is set, {@code false} otherwise
public boolean isParameter(CSVFormat.Parameter p)
return parameters.contains(p);
* Set the value of a parameter of the exporter
* @param p the parameter
* @param value the value to set
public void setParameter(CSVFormat.Parameter p, boolean value)
if (value) {
} else {
* Import a graph.
* The provided graph must be able to support the features of the graph that is read. For
* example if the input contains self-loops then the graph provided must also support
* self-loops. The same for multiple edges.
* If the provided graph is a weighted graph, the importer also reads edge weights.
* @param graph the graph
* @param input the input reader
* @throws ImportException in case an error occurs, such as I/O or parse error
public void importGraph(Graph graph, Reader input)
throws ImportException
switch (format) {
read(graph, input, new AdjacencyListCSVListener(graph));
case MATRIX:
read(graph, input, new MatrixCSVListener(graph));
private void read(Graph graph, Reader input, CSVBaseListener listener)
throws ImportException
try {
ThrowingErrorListener errorListener = new ThrowingErrorListener();
// create lexer
CSVLexer lexer = new CSVLexer(new ANTLRInputStream(input));
// create parser
CSVParser parser = new CSVParser(new CommonTokenStream(lexer));
// Specify our entry point
CSVParser.FileContext graphContext = parser.file();
// Walk it and attach our listener
ParseTreeWalker walker = new ParseTreeWalker();
walker.walk(listener, graphContext);
} catch (IOException e) {
throw new ImportException("Failed to import CSV graph: " + e.getMessage(), e);
} catch (ParseCancellationException pe) {
throw new ImportException("Failed to import CSV graph: " + pe.getMessage(), pe);
} catch (IllegalArgumentException iae) {
throw new ImportException("Failed to import CSV graph: " + iae.getMessage(), iae);
private class ThrowingErrorListener
extends BaseErrorListener
public void syntaxError(
Recognizer, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
String msg, RecognitionException e)
throws ParseCancellationException
throw new ParseCancellationException(
"line " + line + ":" + charPositionInLine + " " + msg);
// listener for the edge list format
private class AdjacencyListCSVListener
extends RowCSVListener
public AdjacencyListCSVListener(Graph graph)
protected void handleRow()
// first is source
String sourceKey = row.get(0);
if (sourceKey.isEmpty()) {
throw new ParseCancellationException("Source vertex cannot be empty");
V source = vertices.get(sourceKey);
if (source == null) {
source = vertexProvider.buildVertex(sourceKey, new HashMap<>());
vertices.put(sourceKey, source);
// remaining are targets
for (String key : row) {
if (key.isEmpty()) {
throw new ParseCancellationException("Target vertex cannot be empty");
V target = vertices.get(key);
if (target == null) {
target = vertexProvider.buildVertex(key, new HashMap<>());
vertices.put(key, target);
try {
String label = "e_" + source + "_" + target;
E e = edgeProvider
.buildEdge(source, target, label, new HashMap());
graph.addEdge(source, target, e);
} catch (IllegalArgumentException e) {
throw new ParseCancellationException(
"Provided graph does not support input: " + e.getMessage(), e);
// listener for the edge list format
private class MatrixCSVListener
extends RowCSVListener
private boolean assumeNodeIds;
private boolean assumeEdgeWeights;
private boolean assumeZeroWhenNoEdge;
private int verticesCount;
private int currentVertex;
private String currentVertexName;
private Map columnIndex;
public MatrixCSVListener(Graph graph)
this.assumeNodeIds = parameters.contains(CSVFormat.Parameter.MATRIX_FORMAT_NODEID);
this.assumeEdgeWeights =
this.assumeZeroWhenNoEdge =
this.verticesCount = 0;
this.currentVertex = 1;
this.currentVertexName = null;
this.columnIndex = new HashMap<>();
protected void handleRow()
if (assumeNodeIds) {
if (!header) {
currentVertexName = row.get(0);
} else {
currentVertexName = String.valueOf(currentVertex);
if (header) {
if (assumeNodeIds) {
} else {
} else {
private void createVerticesFromNodeIds()
// header line contains nodes
verticesCount = row.size();
if (verticesCount < 1) {
throw new ParseCancellationException("Failed to parse header with vertices");
int v = 1;
for (String vertexName : row) {
if (vertexName.trim().isEmpty()) {
throw new ParseCancellationException(
"Failed to parse header with vertices (empty name)");
V vertex = vertexProvider.buildVertex(vertexName, new HashMap<>());
vertices.put(vertexName, vertex);
columnIndex.put(v, vertexName);
private void createVertices()
// header line contains nodes
verticesCount = row.size();
if (verticesCount < 1) {
throw new ParseCancellationException("Failed to parse header with vertices");
int v = 1;
for (v = 1; v <= verticesCount; v++) {
String vertexName = String.valueOf(v);
V vertex = vertexProvider.buildVertex(vertexName, new HashMap<>());
vertices.put(vertexName, vertex);
columnIndex.put(v, vertexName);
private void createEdges()
if (row.size() != verticesCount) {
throw new ParseCancellationException(
"Row contains fewer than " + verticesCount + " entries");
int target = 1;
for (String entry : row) {
// try to parse an integer
try {
Integer entryAsInteger = Integer.parseInt(entry);
if (entryAsInteger == 0) {
if (!assumeZeroWhenNoEdge && assumeEdgeWeights) {
createEdge(currentVertexName, columnIndex.get(target), 0d);
} else {
if (assumeEdgeWeights) {
currentVertexName, columnIndex.get(target),
} else {
createEdge(currentVertexName, columnIndex.get(target), null);
} catch (NumberFormatException nfe) {
// nothing
// try to parse a double
try {
Double entryAsDouble = Double.parseDouble(entry);
if (assumeEdgeWeights) {
createEdge(currentVertexName, columnIndex.get(target), entryAsDouble);
} else {
throw new ParseCancellationException(
"Double entry found when expecting no weights");
} catch (NumberFormatException nfe) {
// nothing
private void createEdge(String sourceName, String targetName, Double weight)
try {
V source = vertices.get(sourceName);
V target = vertices.get(targetName);
String label = "e_" + source + "_" + target;
E e = edgeProvider.buildEdge(source, target, label, new HashMap());
graph.addEdge(source, target, e);
if (weight != null) {
if (graph instanceof WeightedGraph, ?>) {
((WeightedGraph) graph).setEdgeWeight(e, weight);
} catch (IllegalArgumentException e) {
throw new ParseCancellationException(
"Provided graph does not support input: " + e.getMessage(), e);
// base listener
private abstract class RowCSVListener
extends CSVBaseListener
protected Graph graph;
protected List row;
protected Map vertices;
protected boolean header;
public RowCSVListener(Graph graph)
this.graph = graph;
this.row = new ArrayList<>();
this.vertices = new HashMap<>();
this.header = false;
public void enterHeader(CSVParser.HeaderContext ctx)
header = true;
public void exitHeader(CSVParser.HeaderContext ctx)
header = false;
public void enterRecord(CSVParser.RecordContext ctx)
public void exitRecord(CSVParser.RecordContext ctx)
if (row.isEmpty()) {
throw new ParseCancellationException("Empty CSV record");
public void exitTextField(CSVParser.TextFieldContext ctx)
public void exitStringField(CSVParser.StringFieldContext ctx)
row.add(DSVUtils.unescapeDSV(ctx.STRING().getText(), delimiter));
public void exitEmptyField(CSVParser.EmptyFieldContext ctx)
protected abstract void handleRow();
// End