
org.apache.flink.examples.java.graph.EnumTrianglesOpt Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.examples.java.graph;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.examples.java.graph.util.EnumTrianglesData;
import org.apache.flink.examples.java.graph.util.EnumTrianglesDataTypes.Edge;
import org.apache.flink.examples.java.graph.util.EnumTrianglesDataTypes.EdgeWithDegrees;
import org.apache.flink.examples.java.graph.util.EnumTrianglesDataTypes.Triad;
/**
* Triangle enumeration is a pre-processing step to find closely connected parts in graphs.
* A triangle consists of three edges that connect three vertices with each other.
*
*
* The basic algorithm works as follows:
* It groups all edges that share a common vertex and builds triads, i.e., triples of vertices
* that are connected by two edges. Finally, all triads are filtered for which no third edge exists
* that closes the triangle.
*
*
* For a group of n edges that share a common vertex, the number of built triads is quadratic ((n*(n-1))/2).
* Therefore, an optimization of the algorithm is to group edges on the vertex with the smaller output degree to
* reduce the number of triads.
* This implementation extends the basic algorithm by computing output degrees of edge vertices and
* grouping on edges on the vertex with the smaller degree.
*
*
* Input files are plain text files and must be formatted as follows:
*
* - Edges are represented as pairs for vertex IDs which are separated by space
* characters. Edges are separated by new-line characters.
* For example "1 2\n2 12\n1 12\n42 63"
gives four (undirected) edges (1)-(2), (2)-(12), (1)-(12), and (42)-(63)
* that include a triangle
*
*
* (1)
* / \
* (2)-(12)
*
*
* Usage: EnumTriangleOpt <edge path> <result path>
* If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}.
*
*
* This example shows how to use:
*
* - Custom Java objects which extend Tuple
*
- Group Sorting
*
*
*/
@SuppressWarnings("serial")
public class EnumTrianglesOpt {
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if(!parseParameters(args)) {
return;
}
// set up execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read input data
DataSet edges = getEdgeDataSet(env);
// annotate edges with degrees
DataSet edgesWithDegrees = edges
.flatMap(new EdgeDuplicator())
.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new DegreeCounter())
.groupBy(EdgeWithDegrees.V1,EdgeWithDegrees.V2).reduce(new DegreeJoiner());
// project edges by degrees
DataSet edgesByDegree = edgesWithDegrees
.map(new EdgeByDegreeProjector());
// project edges by vertex id
DataSet edgesById = edgesByDegree
.map(new EdgeByIdProjector());
DataSet triangles = edgesByDegree
// build triads
.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
// filter triads
.join(edgesById).where(Triad.V2,Triad.V3).equalTo(Edge.V1,Edge.V2).with(new TriadFilter());
// emit result
if(fileOutput) {
triangles.writeAsCsv(outputPath, "\n", ",");
// execute program
env.execute("Triangle Enumeration Example");
} else {
triangles.print();
}
}
// *************************************************************************
// USER FUNCTIONS
// *************************************************************************
/** Converts a Tuple2 into an Edge */
@ForwardedFields("0;1")
public static class TupleEdgeConverter implements MapFunction, Edge> {
private final Edge outEdge = new Edge();
@Override
public Edge map(Tuple2 t) throws Exception {
outEdge.copyVerticesFromTuple2(t);
return outEdge;
}
}
/** Emits for an edge the original edge and its switched version. */
private static class EdgeDuplicator implements FlatMapFunction {
@Override
public void flatMap(Edge edge, Collector out) throws Exception {
out.collect(edge);
edge.flipVertices();
out.collect(edge);
}
}
/**
* Counts the number of edges that share a common vertex.
* Emits one edge for each input edge with a degree annotation for the shared vertex.
* For each emitted edge, the first vertex is the vertex with the smaller id.
*/
private static class DegreeCounter implements GroupReduceFunction {
final ArrayList otherVertices = new ArrayList();
final EdgeWithDegrees outputEdge = new EdgeWithDegrees();
@Override
public void reduce(Iterable edgesIter, Collector out) {
Iterator edges = edgesIter.iterator();
otherVertices.clear();
// get first edge
Edge edge = edges.next();
Integer groupVertex = edge.getFirstVertex();
this.otherVertices.add(edge.getSecondVertex());
// get all other edges (assumes edges are sorted by second vertex)
while (edges.hasNext()) {
edge = edges.next();
Integer otherVertex = edge.getSecondVertex();
// collect unique vertices
if(!otherVertices.contains(otherVertex) && otherVertex != groupVertex) {
this.otherVertices.add(otherVertex);
}
}
int degree = this.otherVertices.size();
// emit edges
for(Integer otherVertex : this.otherVertices) {
if(groupVertex < otherVertex) {
outputEdge.setFirstVertex(groupVertex);
outputEdge.setFirstDegree(degree);
outputEdge.setSecondVertex(otherVertex);
outputEdge.setSecondDegree(0);
} else {
outputEdge.setFirstVertex(otherVertex);
outputEdge.setFirstDegree(0);
outputEdge.setSecondVertex(groupVertex);
outputEdge.setSecondDegree(degree);
}
out.collect(outputEdge);
}
}
}
/**
* Builds an edge with degree annotation from two edges that have the same vertices and only one
* degree annotation.
*/
@ForwardedFields("0;1")
private static class DegreeJoiner implements ReduceFunction {
private final EdgeWithDegrees outEdge = new EdgeWithDegrees();
@Override
public EdgeWithDegrees reduce(EdgeWithDegrees edge1, EdgeWithDegrees edge2) throws Exception {
// copy first edge
outEdge.copyFrom(edge1);
// set missing degree
if(edge1.getFirstDegree() == 0 && edge1.getSecondDegree() != 0) {
outEdge.setFirstDegree(edge2.getFirstDegree());
} else if (edge1.getFirstDegree() != 0 && edge1.getSecondDegree() == 0) {
outEdge.setSecondDegree(edge2.getSecondDegree());
}
return outEdge;
}
}
/** Projects an edge (pair of vertices) such that the first vertex is the vertex with the smaller degree. */
private static class EdgeByDegreeProjector implements MapFunction {
private final Edge outEdge = new Edge();
@Override
public Edge map(EdgeWithDegrees inEdge) throws Exception {
// copy vertices to simple edge
outEdge.copyVerticesFromEdgeWithDegrees(inEdge);
// flip vertices if first degree is larger than second degree.
if(inEdge.getFirstDegree() > inEdge.getSecondDegree()) {
outEdge.flipVertices();
}
// return edge
return outEdge;
}
}
/** Projects an edge (pair of vertices) such that the id of the first is smaller than the id of the second. */
private static class EdgeByIdProjector implements MapFunction {
@Override
public Edge map(Edge inEdge) throws Exception {
// flip vertices if necessary
if(inEdge.getFirstVertex() > inEdge.getSecondVertex()) {
inEdge.flipVertices();
}
return inEdge;
}
}
/**
* Builds triads (triples of vertices) from pairs of edges that share a vertex.
* The first vertex of a triad is the shared vertex, the second and third vertex are ordered by vertexId.
* Assumes that input edges share the first vertex and are in ascending order of the second vertex.
*/
@ForwardedFields("0")
private static class TriadBuilder implements GroupReduceFunction {
private final List vertices = new ArrayList();
private final Triad outTriad = new Triad();
@Override
public void reduce(Iterable edgesIter, Collector out) throws Exception {
final Iterator edges = edgesIter.iterator();
// clear vertex list
vertices.clear();
// read first edge
Edge firstEdge = edges.next();
outTriad.setFirstVertex(firstEdge.getFirstVertex());
vertices.add(firstEdge.getSecondVertex());
// build and emit triads
while (edges.hasNext()) {
Integer higherVertexId = edges.next().getSecondVertex();
// combine vertex with all previously read vertices
for(Integer lowerVertexId : vertices) {
outTriad.setSecondVertex(lowerVertexId);
outTriad.setThirdVertex(higherVertexId);
out.collect(outTriad);
}
vertices.add(higherVertexId);
}
}
}
/** Filters triads (three vertices connected by two edges) without a closing third edge. */
private static class TriadFilter implements JoinFunction {
@Override
public Triad join(Triad triad, Edge edge) throws Exception {
return triad;
}
}
// *************************************************************************
// UTIL METHODS
// *************************************************************************
private static boolean fileOutput = false;
private static String edgePath = null;
private static String outputPath = null;
private static boolean parseParameters(String[] args) {
if(args.length > 0) {
// parse input arguments
fileOutput = true;
if(args.length == 2) {
edgePath = args[0];
outputPath = args[1];
} else {
System.err.println("Usage: EnumTriangleBasic ");
return false;
}
} else {
System.out.println("Executing Enum Triangles Opt example with built-in default data.");
System.out.println(" Provide parameters to read input data from files.");
System.out.println(" See the documentation for the correct format of input files.");
System.out.println(" Usage: EnumTriangleOpt ");
}
return true;
}
private static DataSet getEdgeDataSet(ExecutionEnvironment env) {
if(fileOutput) {
return env.readCsvFile(edgePath)
.fieldDelimiter(" ")
.includeFields(true, true)
.types(Integer.class, Integer.class)
.map(new TupleEdgeConverter());
} else {
return EnumTrianglesData.getDefaultEdgeDataSet(env);
}
}
}