org.jgrapht.ext.CSVImporter Maven / Gradle / Ivy
/*
* (C) Copyright 2016-2016, by Dimitrios Michail and Contributors.
*
* JGraphT : a free Java graph-theory library
*
* This program and the accompanying materials are dual-licensed under
* either
*
* (a) the terms of the GNU Lesser General Public License version 2.1
* as published by the Free Software Foundation, or (at your option) any
* later version.
*
* or (per the licensee's choosing)
*
* (b) the terms of the Eclipse Public License v1.0 as published by
* the Eclipse Foundation.
*/
package org.jgrapht.ext;
import java.io.*;
import java.util.*;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.misc.*;
import org.antlr.v4.runtime.tree.*;
import org.jgrapht.*;
/**
* Imports a graph from a CSV Format or any other Delimiter-separated value format.
*
*
* The importer supports various different formats which can be adjusted using the
* {@link #setFormat(CSVFormat) setFormat} method. The supported formats are the same CSV formats
* used by Gephi . For some
* of the formats, the behavior of the importer can be adjusted using the
* {@link #setParameter(org.jgrapht.ext.CSVFormat.Parameter, boolean) setParameter} method. See
* {@link CSVFormat} for a description of the formats.
*
*
*
* The importer respects rfc4180. The caller can
* also adjust the separator to something like semicolon or pipe instead of comma. In such a case,
* all fields are unescaped using the new separator. See
* Delimiter- separated
* values for more information.
*
*
*
* This importer does not distinguish between {@link CSVFormat#EDGE_LIST} and
* {@link CSVFormat#ADJACENCY_LIST}. In both cases it assumes the format is
* {@link CSVFormat#ADJACENCY_LIST}.
*
*
* @see CSVFormat
*
* @param the graph vertex type
* @param the graph edge type
*
* @author Dimitrios Michail
* @since August 2016
*/
public class CSVImporter
implements GraphImporter
{
private static final char DEFAULT_DELIMITER = ',';
private CSVFormat format;
private VertexProvider vertexProvider;
private EdgeProvider edgeProvider;
private char delimiter;
private final Set parameters;
/**
* Constructs a new importer using the {@link CSVFormat#ADJACENCY_LIST} format as default.
*
* @param vertexProvider provider for the generation of vertices. Must not be null.
* @param edgeProvider provider for the generation of edges. Must not be null.
*/
public CSVImporter(VertexProvider vertexProvider, EdgeProvider edgeProvider)
{
this(vertexProvider, edgeProvider, CSVFormat.ADJACENCY_LIST, DEFAULT_DELIMITER);
}
/**
* Constructs a new importer.
*
* @param vertexProvider provider for the generation of vertices. Must not be null.
* @param edgeProvider provider for the generation of edges. Must not be null.
* @param format format to use out of the supported ones
*/
public CSVImporter(
VertexProvider vertexProvider, EdgeProvider edgeProvider, CSVFormat format)
{
this(vertexProvider, edgeProvider, format, DEFAULT_DELIMITER);
}
/**
* Constructs a new importer.
*
* @param vertexProvider provider for the generation of vertices. Must not be null.
* @param edgeProvider provider for the generation of edges. Must not be null.
* @param format format to use out of the supported ones
* @param delimiter delimiter to use (comma, semicolon, pipe, etc.)
*/
public CSVImporter(
VertexProvider vertexProvider, EdgeProvider edgeProvider, CSVFormat format,
char delimiter)
{
if (vertexProvider == null) {
throw new IllegalArgumentException("Vertex provider cannot be null");
}
this.vertexProvider = vertexProvider;
if (edgeProvider == null) {
throw new IllegalArgumentException("Edge provider cannot be null");
}
this.edgeProvider = edgeProvider;
this.format = format;
if (!DSVUtils.isValidDelimiter(delimiter)) {
throw new IllegalArgumentException("Character cannot be used as a delimiter");
}
this.delimiter = delimiter;
this.parameters = new HashSet<>();
}
/**
* Get the format that the importer is using.
*
* @return the input format
*/
public CSVFormat getFormat()
{
return format;
}
/**
* Set the format of the importer
*
* @param format the format to use
*/
public void setFormat(CSVFormat format)
{
this.format = format;
}
/**
* Get the delimiter (comma, semicolon, pipe, etc).
*
* @return the delimiter
*/
public char getDelimiter()
{
return delimiter;
}
/**
* Set the delimiter (comma, semicolon, pipe, etc).
*
* @param delimiter the delimiter to use
*/
public void setDelimiter(char delimiter)
{
if (!DSVUtils.isValidDelimiter(delimiter)) {
throw new IllegalArgumentException("Character cannot be used as a delimiter");
}
this.delimiter = delimiter;
}
/**
* Return if a particular parameter of the exporter is enabled
*
* @param p the parameter
* @return {@code true} if the parameter is set, {@code false} otherwise
*/
public boolean isParameter(CSVFormat.Parameter p)
{
return parameters.contains(p);
}
/**
* Set the value of a parameter of the exporter
*
* @param p the parameter
* @param value the value to set
*/
public void setParameter(CSVFormat.Parameter p, boolean value)
{
if (value) {
parameters.add(p);
} else {
parameters.remove(p);
}
}
/**
* Import a graph.
*
*
* The provided graph must be able to support the features of the graph that is read. For
* example if the input contains self-loops then the graph provided must also support
* self-loops. The same for multiple edges.
*
*
* If the provided graph is a weighted graph, the importer also reads edge weights.
*
* @param graph the graph
* @param input the input reader
* @throws ImportException in case an error occurs, such as I/O or parse error
*/
@Override
public void importGraph(Graph graph, Reader input)
throws ImportException
{
switch (format) {
case EDGE_LIST:
case ADJACENCY_LIST:
read(graph, input, new AdjacencyListCSVListener(graph));
break;
case MATRIX:
read(graph, input, new MatrixCSVListener(graph));
break;
}
}
private void read(Graph graph, Reader input, CSVBaseListener listener)
throws ImportException
{
try {
ThrowingErrorListener errorListener = new ThrowingErrorListener();
// create lexer
CSVLexer lexer = new CSVLexer(new ANTLRInputStream(input));
lexer.setSep(delimiter);
lexer.removeErrorListeners();
lexer.addErrorListener(errorListener);
// create parser
CSVParser parser = new CSVParser(new CommonTokenStream(lexer));
parser.removeErrorListeners();
parser.addErrorListener(errorListener);
// Specify our entry point
CSVParser.FileContext graphContext = parser.file();
// Walk it and attach our listener
ParseTreeWalker walker = new ParseTreeWalker();
walker.walk(listener, graphContext);
} catch (IOException e) {
throw new ImportException("Failed to import CSV graph: " + e.getMessage(), e);
} catch (ParseCancellationException pe) {
throw new ImportException("Failed to import CSV graph: " + pe.getMessage(), pe);
} catch (IllegalArgumentException iae) {
throw new ImportException("Failed to import CSV graph: " + iae.getMessage(), iae);
}
}
private class ThrowingErrorListener
extends BaseErrorListener
{
@Override
public void syntaxError(
Recognizer, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
String msg, RecognitionException e)
throws ParseCancellationException
{
throw new ParseCancellationException(
"line " + line + ":" + charPositionInLine + " " + msg);
}
}
// listener for the edge list format
private class AdjacencyListCSVListener
extends RowCSVListener
{
public AdjacencyListCSVListener(Graph graph)
{
super(graph);
}
@Override
protected void handleRow()
{
// first is source
String sourceKey = row.get(0);
if (sourceKey.isEmpty()) {
throw new ParseCancellationException("Source vertex cannot be empty");
}
V source = vertices.get(sourceKey);
if (source == null) {
source = vertexProvider.buildVertex(sourceKey, new HashMap<>());
vertices.put(sourceKey, source);
graph.addVertex(source);
}
row.remove(0);
// remaining are targets
for (String key : row) {
if (key.isEmpty()) {
throw new ParseCancellationException("Target vertex cannot be empty");
}
V target = vertices.get(key);
if (target == null) {
target = vertexProvider.buildVertex(key, new HashMap<>());
vertices.put(key, target);
graph.addVertex(target);
}
try {
String label = "e_" + source + "_" + target;
E e = edgeProvider
.buildEdge(source, target, label, new HashMap());
graph.addEdge(source, target, e);
} catch (IllegalArgumentException e) {
throw new ParseCancellationException(
"Provided graph does not support input: " + e.getMessage(), e);
}
}
}
}
// listener for the edge list format
private class MatrixCSVListener
extends RowCSVListener
{
private boolean assumeNodeIds;
private boolean assumeEdgeWeights;
private boolean assumeZeroWhenNoEdge;
private int verticesCount;
private int currentVertex;
private String currentVertexName;
private Map columnIndex;
public MatrixCSVListener(Graph graph)
{
super(graph);
this.assumeNodeIds = parameters.contains(CSVFormat.Parameter.MATRIX_FORMAT_NODEID);
this.assumeEdgeWeights =
parameters.contains(CSVFormat.Parameter.MATRIX_FORMAT_EDGE_WEIGHTS);
this.assumeZeroWhenNoEdge =
parameters.contains(CSVFormat.Parameter.MATRIX_FORMAT_ZERO_WHEN_NO_EDGE);
this.verticesCount = 0;
this.currentVertex = 1;
this.currentVertexName = null;
this.columnIndex = new HashMap<>();
}
@Override
protected void handleRow()
{
if (assumeNodeIds) {
if (!header) {
currentVertexName = row.get(0);
}
row.remove(0);
} else {
currentVertexName = String.valueOf(currentVertex);
}
if (header) {
if (assumeNodeIds) {
createVerticesFromNodeIds();
} else {
createVertices();
createEdges();
currentVertex++;
}
} else {
createEdges();
currentVertex++;
}
}
private void createVerticesFromNodeIds()
{
// header line contains nodes
verticesCount = row.size();
if (verticesCount < 1) {
throw new ParseCancellationException("Failed to parse header with vertices");
}
int v = 1;
for (String vertexName : row) {
if (vertexName.trim().isEmpty()) {
throw new ParseCancellationException(
"Failed to parse header with vertices (empty name)");
}
V vertex = vertexProvider.buildVertex(vertexName, new HashMap<>());
vertices.put(vertexName, vertex);
graph.addVertex(vertex);
columnIndex.put(v, vertexName);
v++;
}
}
private void createVertices()
{
// header line contains nodes
verticesCount = row.size();
if (verticesCount < 1) {
throw new ParseCancellationException("Failed to parse header with vertices");
}
int v = 1;
for (v = 1; v <= verticesCount; v++) {
String vertexName = String.valueOf(v);
V vertex = vertexProvider.buildVertex(vertexName, new HashMap<>());
vertices.put(vertexName, vertex);
graph.addVertex(vertex);
columnIndex.put(v, vertexName);
}
}
private void createEdges()
{
if (row.size() != verticesCount) {
throw new ParseCancellationException(
"Row contains fewer than " + verticesCount + " entries");
}
int target = 1;
for (String entry : row) {
// try to parse an integer
try {
Integer entryAsInteger = Integer.parseInt(entry);
if (entryAsInteger == 0) {
if (!assumeZeroWhenNoEdge && assumeEdgeWeights) {
createEdge(currentVertexName, columnIndex.get(target), 0d);
}
} else {
if (assumeEdgeWeights) {
createEdge(
currentVertexName, columnIndex.get(target),
Double.valueOf(entryAsInteger));
} else {
createEdge(currentVertexName, columnIndex.get(target), null);
}
}
target++;
continue;
} catch (NumberFormatException nfe) {
// nothing
}
// try to parse a double
try {
Double entryAsDouble = Double.parseDouble(entry);
if (assumeEdgeWeights) {
createEdge(currentVertexName, columnIndex.get(target), entryAsDouble);
} else {
throw new ParseCancellationException(
"Double entry found when expecting no weights");
}
} catch (NumberFormatException nfe) {
// nothing
}
target++;
}
}
private void createEdge(String sourceName, String targetName, Double weight)
{
try {
V source = vertices.get(sourceName);
V target = vertices.get(targetName);
String label = "e_" + source + "_" + target;
E e = edgeProvider.buildEdge(source, target, label, new HashMap());
graph.addEdge(source, target, e);
if (weight != null) {
if (graph instanceof WeightedGraph, ?>) {
((WeightedGraph) graph).setEdgeWeight(e, weight);
}
}
} catch (IllegalArgumentException e) {
throw new ParseCancellationException(
"Provided graph does not support input: " + e.getMessage(), e);
}
}
}
// base listener
private abstract class RowCSVListener
extends CSVBaseListener
{
protected Graph graph;
protected List row;
protected Map vertices;
protected boolean header;
public RowCSVListener(Graph graph)
{
this.graph = graph;
this.row = new ArrayList<>();
this.vertices = new HashMap<>();
this.header = false;
}
@Override
public void enterHeader(CSVParser.HeaderContext ctx)
{
header = true;
}
@Override
public void exitHeader(CSVParser.HeaderContext ctx)
{
header = false;
}
@Override
public void enterRecord(CSVParser.RecordContext ctx)
{
row.clear();
}
@Override
public void exitRecord(CSVParser.RecordContext ctx)
{
if (row.isEmpty()) {
throw new ParseCancellationException("Empty CSV record");
}
handleRow();
}
@Override
public void exitTextField(CSVParser.TextFieldContext ctx)
{
row.add(ctx.TEXT().getText());
}
@Override
public void exitStringField(CSVParser.StringFieldContext ctx)
{
row.add(DSVUtils.unescapeDSV(ctx.STRING().getText(), delimiter));
}
@Override
public void exitEmptyField(CSVParser.EmptyFieldContext ctx)
{
row.add("");
}
protected abstract void handleRow();
}
}
// End CSVImporter.java