All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.examples.java.graph.EnumTrianglesOpt Maven / Gradle / Ivy

There is a newer version: 0.10.2-hadoop1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.examples.java.graph;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.examples.java.graph.util.EnumTrianglesData;
import org.apache.flink.examples.java.graph.util.EnumTrianglesDataTypes.Edge;
import org.apache.flink.examples.java.graph.util.EnumTrianglesDataTypes.EdgeWithDegrees;
import org.apache.flink.examples.java.graph.util.EnumTrianglesDataTypes.Triad;

/**
 * Triangle enumeration is a pre-processing step to find closely connected parts in graphs.
 * A triangle consists of three edges that connect three vertices with each other.
 * 
 * 

* The basic algorithm works as follows: * It groups all edges that share a common vertex and builds triads, i.e., triples of vertices * that are connected by two edges. Finally, all triads are filtered for which no third edge exists * that closes the triangle. * *

* For a group of n edges that share a common vertex, the number of built triads is quadratic ((n*(n-1))/2). * Therefore, an optimization of the algorithm is to group edges on the vertex with the smaller output degree to * reduce the number of triads. * This implementation extends the basic algorithm by computing output degrees of edge vertices and * grouping on edges on the vertex with the smaller degree. * *

* Input files are plain text files and must be formatted as follows: *

    *
  • Edges are represented as pairs for vertex IDs which are separated by space * characters. Edges are separated by new-line characters.
    * For example "1 2\n2 12\n1 12\n42 63" gives four (undirected) edges (1)-(2), (2)-(12), (1)-(12), and (42)-(63) * that include a triangle *
*
 *     (1)
 *     /  \
 *   (2)-(12)
 * 
* * Usage: EnumTriangleOpt <edge path> <result path>
* If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}. * *

* This example shows how to use: *

    *
  • Custom Java objects which extend Tuple *
  • Group Sorting *
* */ @SuppressWarnings("serial") public class EnumTrianglesOpt { // ************************************************************************* // PROGRAM // ************************************************************************* public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read input data DataSet edges = getEdgeDataSet(env); // annotate edges with degrees DataSet edgesWithDegrees = edges .flatMap(new EdgeDuplicator()) .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new DegreeCounter()) .groupBy(EdgeWithDegrees.V1,EdgeWithDegrees.V2).reduce(new DegreeJoiner()); // project edges by degrees DataSet edgesByDegree = edgesWithDegrees .map(new EdgeByDegreeProjector()); // project edges by vertex id DataSet edgesById = edgesByDegree .map(new EdgeByIdProjector()); DataSet triangles = edgesByDegree // build triads .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder()) // filter triads .join(edgesById).where(Triad.V2,Triad.V3).equalTo(Edge.V1,Edge.V2).with(new TriadFilter()); // emit result if(fileOutput) { triangles.writeAsCsv(outputPath, "\n", ","); // execute program env.execute("Triangle Enumeration Example"); } else { triangles.print(); } } // ************************************************************************* // USER FUNCTIONS // ************************************************************************* /** Converts a Tuple2 into an Edge */ @ForwardedFields("0;1") public static class TupleEdgeConverter implements MapFunction, Edge> { private final Edge outEdge = new Edge(); @Override public Edge map(Tuple2 t) throws Exception { outEdge.copyVerticesFromTuple2(t); return outEdge; } } /** Emits for an edge the original edge and its switched version. */ private static class EdgeDuplicator implements FlatMapFunction { @Override public void flatMap(Edge edge, Collector out) throws Exception { out.collect(edge); edge.flipVertices(); out.collect(edge); } } /** * Counts the number of edges that share a common vertex. * Emits one edge for each input edge with a degree annotation for the shared vertex. * For each emitted edge, the first vertex is the vertex with the smaller id. */ private static class DegreeCounter implements GroupReduceFunction { final ArrayList otherVertices = new ArrayList(); final EdgeWithDegrees outputEdge = new EdgeWithDegrees(); @Override public void reduce(Iterable edgesIter, Collector out) { Iterator edges = edgesIter.iterator(); otherVertices.clear(); // get first edge Edge edge = edges.next(); Integer groupVertex = edge.getFirstVertex(); this.otherVertices.add(edge.getSecondVertex()); // get all other edges (assumes edges are sorted by second vertex) while (edges.hasNext()) { edge = edges.next(); Integer otherVertex = edge.getSecondVertex(); // collect unique vertices if(!otherVertices.contains(otherVertex) && otherVertex != groupVertex) { this.otherVertices.add(otherVertex); } } int degree = this.otherVertices.size(); // emit edges for(Integer otherVertex : this.otherVertices) { if(groupVertex < otherVertex) { outputEdge.setFirstVertex(groupVertex); outputEdge.setFirstDegree(degree); outputEdge.setSecondVertex(otherVertex); outputEdge.setSecondDegree(0); } else { outputEdge.setFirstVertex(otherVertex); outputEdge.setFirstDegree(0); outputEdge.setSecondVertex(groupVertex); outputEdge.setSecondDegree(degree); } out.collect(outputEdge); } } } /** * Builds an edge with degree annotation from two edges that have the same vertices and only one * degree annotation. */ @ForwardedFields("0;1") private static class DegreeJoiner implements ReduceFunction { private final EdgeWithDegrees outEdge = new EdgeWithDegrees(); @Override public EdgeWithDegrees reduce(EdgeWithDegrees edge1, EdgeWithDegrees edge2) throws Exception { // copy first edge outEdge.copyFrom(edge1); // set missing degree if(edge1.getFirstDegree() == 0 && edge1.getSecondDegree() != 0) { outEdge.setFirstDegree(edge2.getFirstDegree()); } else if (edge1.getFirstDegree() != 0 && edge1.getSecondDegree() == 0) { outEdge.setSecondDegree(edge2.getSecondDegree()); } return outEdge; } } /** Projects an edge (pair of vertices) such that the first vertex is the vertex with the smaller degree. */ private static class EdgeByDegreeProjector implements MapFunction { private final Edge outEdge = new Edge(); @Override public Edge map(EdgeWithDegrees inEdge) throws Exception { // copy vertices to simple edge outEdge.copyVerticesFromEdgeWithDegrees(inEdge); // flip vertices if first degree is larger than second degree. if(inEdge.getFirstDegree() > inEdge.getSecondDegree()) { outEdge.flipVertices(); } // return edge return outEdge; } } /** Projects an edge (pair of vertices) such that the id of the first is smaller than the id of the second. */ private static class EdgeByIdProjector implements MapFunction { @Override public Edge map(Edge inEdge) throws Exception { // flip vertices if necessary if(inEdge.getFirstVertex() > inEdge.getSecondVertex()) { inEdge.flipVertices(); } return inEdge; } } /** * Builds triads (triples of vertices) from pairs of edges that share a vertex. * The first vertex of a triad is the shared vertex, the second and third vertex are ordered by vertexId. * Assumes that input edges share the first vertex and are in ascending order of the second vertex. */ @ForwardedFields("0") private static class TriadBuilder implements GroupReduceFunction { private final List vertices = new ArrayList(); private final Triad outTriad = new Triad(); @Override public void reduce(Iterable edgesIter, Collector out) throws Exception { final Iterator edges = edgesIter.iterator(); // clear vertex list vertices.clear(); // read first edge Edge firstEdge = edges.next(); outTriad.setFirstVertex(firstEdge.getFirstVertex()); vertices.add(firstEdge.getSecondVertex()); // build and emit triads while (edges.hasNext()) { Integer higherVertexId = edges.next().getSecondVertex(); // combine vertex with all previously read vertices for(Integer lowerVertexId : vertices) { outTriad.setSecondVertex(lowerVertexId); outTriad.setThirdVertex(higherVertexId); out.collect(outTriad); } vertices.add(higherVertexId); } } } /** Filters triads (three vertices connected by two edges) without a closing third edge. */ private static class TriadFilter implements JoinFunction { @Override public Triad join(Triad triad, Edge edge) throws Exception { return triad; } } // ************************************************************************* // UTIL METHODS // ************************************************************************* private static boolean fileOutput = false; private static String edgePath = null; private static String outputPath = null; private static boolean parseParameters(String[] args) { if(args.length > 0) { // parse input arguments fileOutput = true; if(args.length == 2) { edgePath = args[0]; outputPath = args[1]; } else { System.err.println("Usage: EnumTriangleBasic "); return false; } } else { System.out.println("Executing Enum Triangles Opt example with built-in default data."); System.out.println(" Provide parameters to read input data from files."); System.out.println(" See the documentation for the correct format of input files."); System.out.println(" Usage: EnumTriangleOpt "); } return true; } private static DataSet getEdgeDataSet(ExecutionEnvironment env) { if(fileOutput) { return env.readCsvFile(edgePath) .fieldDelimiter(" ") .includeFields(true, true) .types(Integer.class, Integer.class) .map(new TupleEdgeConverter()); } else { return EnumTrianglesData.getDefaultEdgeDataSet(env); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy