All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.salesforce.jgrapht.ext.CSVImporter Maven / Gradle / Ivy

Go to download

This project contains the apt processor that implements all the checks enumerated in @Verify. It is a self contained, and shaded jar.

There is a newer version: 2.0.7
Show newest version
/*
 * (C) Copyright 2016-2017, by Dimitrios Michail and Contributors.
 *
 * JGraphT : a free Java graph-theory library
 *
 * This program and the accompanying materials are dual-licensed under
 * either
 *
 * (a) the terms of the GNU Lesser General Public License version 2.1
 * as published by the Free Software Foundation, or (at your option) any
 * later version.
 *
 * or (per the licensee's choosing)
 *
 * (b) the terms of the Eclipse Public License v1.0 as published by
 * the Eclipse Foundation.
 */
package com.salesforce.jgrapht.ext;

import java.io.*;
import java.util.*;

import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.misc.*;
import org.antlr.v4.runtime.tree.*;
import com.salesforce.jgrapht.*;

/**
 * Imports a graph from a CSV Format or any other Delimiter-separated value format.
 * 
 * 

* The importer supports various different formats which can be adjusted using the * {@link #setFormat(CSVFormat) setFormat} method. The supported formats are the same CSV formats * used by Gephi . For some * of the formats, the behavior of the importer can be adjusted using the * {@link #setParameter(com.salesforce.jgrapht.ext.CSVFormat.Parameter, boolean) setParameter} method. See * {@link CSVFormat} for a description of the formats. *

* *

* The importer respects rfc4180. The caller can * also adjust the separator to something like semicolon or pipe instead of comma. In such a case, * all fields are unescaped using the new separator. See * Delimiter- separated * values for more information. *

* *

* This importer does not distinguish between {@link CSVFormat#EDGE_LIST} and * {@link CSVFormat#ADJACENCY_LIST}. In both cases it assumes the format is * {@link CSVFormat#ADJACENCY_LIST}. *

* * @see CSVFormat * * @param the graph vertex type * @param the graph edge type * * @author Dimitrios Michail * @since August 2016 */ public class CSVImporter implements GraphImporter { private static final char DEFAULT_DELIMITER = ','; private CSVFormat format; private VertexProvider vertexProvider; private EdgeProvider edgeProvider; private char delimiter; private final Set parameters; /** * Constructs a new importer using the {@link CSVFormat#ADJACENCY_LIST} format as default. * * @param vertexProvider provider for the generation of vertices. Must not be null. * @param edgeProvider provider for the generation of edges. Must not be null. */ public CSVImporter(VertexProvider vertexProvider, EdgeProvider edgeProvider) { this(vertexProvider, edgeProvider, CSVFormat.ADJACENCY_LIST, DEFAULT_DELIMITER); } /** * Constructs a new importer. * * @param vertexProvider provider for the generation of vertices. Must not be null. * @param edgeProvider provider for the generation of edges. Must not be null. * @param format format to use out of the supported ones */ public CSVImporter( VertexProvider vertexProvider, EdgeProvider edgeProvider, CSVFormat format) { this(vertexProvider, edgeProvider, format, DEFAULT_DELIMITER); } /** * Constructs a new importer. * * @param vertexProvider provider for the generation of vertices. Must not be null. * @param edgeProvider provider for the generation of edges. Must not be null. * @param format format to use out of the supported ones * @param delimiter delimiter to use (comma, semicolon, pipe, etc.) */ public CSVImporter( VertexProvider vertexProvider, EdgeProvider edgeProvider, CSVFormat format, char delimiter) { if (vertexProvider == null) { throw new IllegalArgumentException("Vertex provider cannot be null"); } this.vertexProvider = vertexProvider; if (edgeProvider == null) { throw new IllegalArgumentException("Edge provider cannot be null"); } this.edgeProvider = edgeProvider; this.format = format; if (!DSVUtils.isValidDelimiter(delimiter)) { throw new IllegalArgumentException("Character cannot be used as a delimiter"); } this.delimiter = delimiter; this.parameters = new HashSet<>(); } /** * Get the format that the importer is using. * * @return the input format */ public CSVFormat getFormat() { return format; } /** * Set the format of the importer * * @param format the format to use */ public void setFormat(CSVFormat format) { this.format = format; } /** * Get the delimiter (comma, semicolon, pipe, etc). * * @return the delimiter */ public char getDelimiter() { return delimiter; } /** * Set the delimiter (comma, semicolon, pipe, etc). * * @param delimiter the delimiter to use */ public void setDelimiter(char delimiter) { if (!DSVUtils.isValidDelimiter(delimiter)) { throw new IllegalArgumentException("Character cannot be used as a delimiter"); } this.delimiter = delimiter; } /** * Return if a particular parameter of the exporter is enabled * * @param p the parameter * @return {@code true} if the parameter is set, {@code false} otherwise */ public boolean isParameter(CSVFormat.Parameter p) { return parameters.contains(p); } /** * Set the value of a parameter of the exporter * * @param p the parameter * @param value the value to set */ public void setParameter(CSVFormat.Parameter p, boolean value) { if (value) { parameters.add(p); } else { parameters.remove(p); } } /** * Import a graph. * *

* The provided graph must be able to support the features of the graph that is read. For * example if the input contains self-loops then the graph provided must also support * self-loops. The same for multiple edges. * *

* If the provided graph is a weighted graph, the importer also reads edge weights. * * @param graph the graph * @param input the input reader * @throws ImportException in case an error occurs, such as I/O or parse error */ @Override public void importGraph(Graph graph, Reader input) throws ImportException { switch (format) { case EDGE_LIST: case ADJACENCY_LIST: read(graph, input, new AdjacencyListCSVListener(graph)); break; case MATRIX: read(graph, input, new MatrixCSVListener(graph)); break; } } private void read(Graph graph, Reader input, CSVBaseListener listener) throws ImportException { try { ThrowingErrorListener errorListener = new ThrowingErrorListener(); // create lexer CSVLexer lexer = new CSVLexer(new ANTLRInputStream(input)); lexer.setSep(delimiter); lexer.removeErrorListeners(); lexer.addErrorListener(errorListener); // create parser CSVParser parser = new CSVParser(new CommonTokenStream(lexer)); parser.removeErrorListeners(); parser.addErrorListener(errorListener); // Specify our entry point CSVParser.FileContext graphContext = parser.file(); // Walk it and attach our listener ParseTreeWalker walker = new ParseTreeWalker(); walker.walk(listener, graphContext); } catch (IOException e) { throw new ImportException("Failed to import CSV graph: " + e.getMessage(), e); } catch (ParseCancellationException pe) { throw new ImportException("Failed to import CSV graph: " + pe.getMessage(), pe); } catch (IllegalArgumentException iae) { throw new ImportException("Failed to import CSV graph: " + iae.getMessage(), iae); } } private class ThrowingErrorListener extends BaseErrorListener { @Override public void syntaxError( Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) throws ParseCancellationException { throw new ParseCancellationException( "line " + line + ":" + charPositionInLine + " " + msg); } } // listener for the edge list format private class AdjacencyListCSVListener extends RowCSVListener { public AdjacencyListCSVListener(Graph graph) { super(graph); } @Override protected void handleRow() { // first is source String sourceKey = row.get(0); if (sourceKey.isEmpty()) { throw new ParseCancellationException("Source vertex cannot be empty"); } V source = vertices.get(sourceKey); if (source == null) { source = vertexProvider.buildVertex(sourceKey, new HashMap<>()); vertices.put(sourceKey, source); graph.addVertex(source); } row.remove(0); // remaining are targets for (String key : row) { if (key.isEmpty()) { throw new ParseCancellationException("Target vertex cannot be empty"); } V target = vertices.get(key); if (target == null) { target = vertexProvider.buildVertex(key, new HashMap<>()); vertices.put(key, target); graph.addVertex(target); } try { String label = "e_" + source + "_" + target; E e = edgeProvider .buildEdge(source, target, label, new HashMap()); graph.addEdge(source, target, e); } catch (IllegalArgumentException e) { throw new ParseCancellationException( "Provided graph does not support input: " + e.getMessage(), e); } } } } // listener for the edge list format private class MatrixCSVListener extends RowCSVListener { private boolean assumeNodeIds; private boolean assumeEdgeWeights; private boolean assumeZeroWhenNoEdge; private int verticesCount; private int currentVertex; private String currentVertexName; private Map columnIndex; public MatrixCSVListener(Graph graph) { super(graph); this.assumeNodeIds = parameters.contains(CSVFormat.Parameter.MATRIX_FORMAT_NODEID); this.assumeEdgeWeights = parameters.contains(CSVFormat.Parameter.MATRIX_FORMAT_EDGE_WEIGHTS); this.assumeZeroWhenNoEdge = parameters.contains(CSVFormat.Parameter.MATRIX_FORMAT_ZERO_WHEN_NO_EDGE); this.verticesCount = 0; this.currentVertex = 1; this.currentVertexName = null; this.columnIndex = new HashMap<>(); } @Override protected void handleRow() { if (assumeNodeIds) { if (!header) { currentVertexName = row.get(0); } row.remove(0); } else { currentVertexName = String.valueOf(currentVertex); } if (header) { if (assumeNodeIds) { createVerticesFromNodeIds(); } else { createVertices(); createEdges(); currentVertex++; } } else { createEdges(); currentVertex++; } } private void createVerticesFromNodeIds() { // header line contains nodes verticesCount = row.size(); if (verticesCount < 1) { throw new ParseCancellationException("Failed to parse header with vertices"); } int v = 1; for (String vertexName : row) { if (vertexName.trim().isEmpty()) { throw new ParseCancellationException( "Failed to parse header with vertices (empty name)"); } V vertex = vertexProvider.buildVertex(vertexName, new HashMap<>()); vertices.put(vertexName, vertex); graph.addVertex(vertex); columnIndex.put(v, vertexName); v++; } } private void createVertices() { // header line contains nodes verticesCount = row.size(); if (verticesCount < 1) { throw new ParseCancellationException("Failed to parse header with vertices"); } int v = 1; for (v = 1; v <= verticesCount; v++) { String vertexName = String.valueOf(v); V vertex = vertexProvider.buildVertex(vertexName, new HashMap<>()); vertices.put(vertexName, vertex); graph.addVertex(vertex); columnIndex.put(v, vertexName); } } private void createEdges() { if (row.size() != verticesCount) { throw new ParseCancellationException( "Row contains fewer than " + verticesCount + " entries"); } int target = 1; for (String entry : row) { // try to parse an integer try { Integer entryAsInteger = Integer.parseInt(entry); if (entryAsInteger == 0) { if (!assumeZeroWhenNoEdge && assumeEdgeWeights) { createEdge(currentVertexName, columnIndex.get(target), 0d); } } else { if (assumeEdgeWeights) { createEdge( currentVertexName, columnIndex.get(target), Double.valueOf(entryAsInteger)); } else { createEdge(currentVertexName, columnIndex.get(target), null); } } target++; continue; } catch (NumberFormatException nfe) { // nothing } // try to parse a double try { Double entryAsDouble = Double.parseDouble(entry); if (assumeEdgeWeights) { createEdge(currentVertexName, columnIndex.get(target), entryAsDouble); } else { throw new ParseCancellationException( "Double entry found when expecting no weights"); } } catch (NumberFormatException nfe) { // nothing } target++; } } private void createEdge(String sourceName, String targetName, Double weight) { try { V source = vertices.get(sourceName); V target = vertices.get(targetName); String label = "e_" + source + "_" + target; E e = edgeProvider.buildEdge(source, target, label, new HashMap()); graph.addEdge(source, target, e); if (weight != null) { if (graph instanceof WeightedGraph) { ((WeightedGraph) graph).setEdgeWeight(e, weight); } } } catch (IllegalArgumentException e) { throw new ParseCancellationException( "Provided graph does not support input: " + e.getMessage(), e); } } } // base listener private abstract class RowCSVListener extends CSVBaseListener { protected Graph graph; protected List row; protected Map vertices; protected boolean header; public RowCSVListener(Graph graph) { this.graph = graph; this.row = new ArrayList<>(); this.vertices = new HashMap<>(); this.header = false; } @Override public void enterHeader(CSVParser.HeaderContext ctx) { header = true; } @Override public void exitHeader(CSVParser.HeaderContext ctx) { header = false; } @Override public void enterRecord(CSVParser.RecordContext ctx) { row.clear(); } @Override public void exitRecord(CSVParser.RecordContext ctx) { if (row.isEmpty()) { throw new ParseCancellationException("Empty CSV record"); } handleRow(); } @Override public void exitTextField(CSVParser.TextFieldContext ctx) { row.add(ctx.TEXT().getText()); } @Override public void exitStringField(CSVParser.StringFieldContext ctx) { row.add(DSVUtils.unescapeDSV(ctx.STRING().getText(), delimiter)); } @Override public void exitEmptyField(CSVParser.EmptyFieldContext ctx) { row.add(""); } protected abstract void handleRow(); } } // End CSVImporter.java





© 2015 - 2025 Weber Informatics LLC | Privacy Policy