All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.graph.spargel.VertexCentricIteration Maven / Gradle / Ivy

There is a newer version: 1.16.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.graph.spargel;

import java.util.Iterator;
import java.util.Map;

import org.apache.flink.api.common.aggregators.Aggregator;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DeltaIteration;
import org.apache.flink.api.common.functions.RichCoGroupFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.operators.CoGroupOperator;
import org.apache.flink.api.java.operators.CustomUnaryOperation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.graph.Edge;
import org.apache.flink.graph.EdgeDirection;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.Vertex;
import org.apache.flink.util.Collector;

import com.google.common.base.Preconditions;

/**
 * This class represents iterative graph computations, programmed in a vertex-centric perspective.
 * It is a special case of Bulk Synchronous Parallel computation. The paradigm has also been
 * implemented by Google's Pregel system and by Apache Giraph.
 * 

* Vertex centric algorithms operate on graphs, which are defined through vertices and edges. The * algorithms send messages along the edges and update the state of vertices based on * the old state and the incoming messages. All vertices have an initial state. * The computation terminates once no vertex updates it state any more. * Additionally, a maximum number of iterations (supersteps) may be specified. *

* The computation is here represented by two functions: *

    *
  • The {@link VertexUpdateFunction} receives incoming messages and may updates the state for * the vertex. If a state is updated, messages are sent from this vertex. Initially, all vertices are * considered updated.
  • *
  • The {@link MessagingFunction} takes the new vertex state and sends messages along the outgoing * edges of the vertex. The outgoing edges may optionally have an associated value, such as a weight.
  • *
*

* * Vertex-centric graph iterations are are run by calling * {@link Graph#runVertexCentricIteration(VertexUpdateFunction, MessagingFunction, int)}. * * @param The type of the vertex key (the vertex identifier). * @param The type of the vertex value (the state of the vertex). * @param The type of the message sent between vertices along the edges. * @param The type of the values that are associated with the edges. */ public class VertexCentricIteration implements CustomUnaryOperation, Vertex> { private final VertexUpdateFunction updateFunction; private final MessagingFunction messagingFunction; private final DataSet> edgesWithValue; private final int maximumNumberOfIterations; private final TypeInformation messageType; private DataSet> initialVertices; private VertexCentricConfiguration configuration; // ---------------------------------------------------------------------------------- private VertexCentricIteration(VertexUpdateFunction uf, MessagingFunction mf, DataSet> edgesWithValue, int maximumNumberOfIterations) { Preconditions.checkNotNull(uf); Preconditions.checkNotNull(mf); Preconditions.checkNotNull(edgesWithValue); Preconditions.checkArgument(maximumNumberOfIterations > 0, "The maximum number of iterations must be at least one."); this.updateFunction = uf; this.messagingFunction = mf; this.edgesWithValue = edgesWithValue; this.maximumNumberOfIterations = maximumNumberOfIterations; this.messageType = getMessageType(mf); } private TypeInformation getMessageType(MessagingFunction mf) { return TypeExtractor.createTypeInfo(MessagingFunction.class, mf.getClass(), 2, null, null); } // -------------------------------------------------------------------------------------------- // Custom Operator behavior // -------------------------------------------------------------------------------------------- /** * Sets the input data set for this operator. In the case of this operator this input data set represents * the set of vertices with their initial state. * * @param inputData The input data set, which in the case of this operator represents the set of * vertices with their initial state. * * @see org.apache.flink.api.java.operators.CustomUnaryOperation#setInput(org.apache.flink.api.java.DataSet) */ @Override public void setInput(DataSet> inputData) { this.initialVertices = inputData; } /** * Creates the operator that represents this vertex-centric graph computation. * * @return The operator that represents this vertex-centric graph computation. */ @Override public DataSet> createResult() { if (this.initialVertices == null) { throw new IllegalStateException("The input data set has not been set."); } // prepare some type information TypeInformation keyType = ((TupleTypeInfo) initialVertices.getType()).getTypeAt(0); TypeInformation> messageTypeInfo = new TupleTypeInfo>(keyType, messageType); // create a graph Graph graph = Graph.fromDataSet(initialVertices, edgesWithValue, ExecutionEnvironment.getExecutionEnvironment()); // check whether the numVertices option is set and, if so, compute the total number of vertices // and set it within the messaging and update functions if (this.configuration != null && this.configuration.isOptNumVertices()) { try { long numberOfVertices = graph.numberOfVertices(); messagingFunction.setNumberOfVertices(numberOfVertices); updateFunction.setNumberOfVertices(numberOfVertices); } catch (Exception e) { e.printStackTrace(); } } if(this.configuration != null) { messagingFunction.setDirection(this.configuration.getDirection()); } else { messagingFunction.setDirection(EdgeDirection.OUT); } // retrieve the direction in which the updates are made and in which the messages are sent EdgeDirection messagingDirection = messagingFunction.getDirection(); // check whether the degrees option is set and, if so, compute the in and the out degrees and // add them to the vertex value if(this.configuration != null && this.configuration.isOptDegrees()) { return createResultVerticesWithDegrees(graph, messagingDirection, messageTypeInfo); } else { return createResultSimpleVertex(messagingDirection, messageTypeInfo); } } /** * Creates a new vertex-centric iteration operator for graphs where the edges are associated with a value (such as * a weight or distance). * * @param edgesWithValue The data set containing edges. * @param uf The function that updates the state of the vertices from the incoming messages. * @param mf The function that turns changed vertex states into messages along the edges. * * @param The type of the vertex key (the vertex identifier). * @param The type of the vertex value (the state of the vertex). * @param The type of the message sent between vertices along the edges. * @param The type of the values that are associated with the edges. * * @return An in stance of the vertex-centric graph computation operator. */ public static final VertexCentricIteration withEdges( DataSet> edgesWithValue, VertexUpdateFunction uf, MessagingFunction mf, int maximumNumberOfIterations) { return new VertexCentricIteration(uf, mf, edgesWithValue, maximumNumberOfIterations); } /** * Configures this vertex-centric iteration with the provided parameters. * * @param parameters the configuration parameters */ public void configure(VertexCentricConfiguration parameters) { this.configuration = parameters; } /** * @return the configuration parameters of this vertex-centric iteration */ public VertexCentricConfiguration getIterationConfiguration() { return this.configuration; } // -------------------------------------------------------------------------------------------- // Wrapping UDFs // -------------------------------------------------------------------------------------------- private static abstract class VertexUpdateUdf extends RichCoGroupFunction< Tuple2, Vertex, Vertex> implements ResultTypeQueryable> { private static final long serialVersionUID = 1L; final VertexUpdateFunction vertexUpdateFunction; final MessageIterator messageIter = new MessageIterator(); private transient TypeInformation> resultType; private VertexUpdateUdf(VertexUpdateFunction vertexUpdateFunction, TypeInformation> resultType) { this.vertexUpdateFunction = vertexUpdateFunction; this.resultType = resultType; } @Override public void open(Configuration parameters) throws Exception { if (getIterationRuntimeContext().getSuperstepNumber() == 1) { this.vertexUpdateFunction.init(getIterationRuntimeContext()); } this.vertexUpdateFunction.preSuperstep(); } @Override public void close() throws Exception { this.vertexUpdateFunction.postSuperstep(); } @Override public TypeInformation> getProducedType() { return this.resultType; } } @SuppressWarnings("serial") private static final class VertexUpdateUdfSimpleVV extends VertexUpdateUdf { private VertexUpdateUdfSimpleVV(VertexUpdateFunction vertexUpdateFunction, TypeInformation> resultType) { super(vertexUpdateFunction, resultType); } @Override public void coGroup(Iterable> messages, Iterable> vertex, Collector> out) throws Exception { final Iterator> vertexIter = vertex.iterator(); if (vertexIter.hasNext()) { Vertex vertexState = vertexIter.next(); @SuppressWarnings("unchecked") Iterator> downcastIter = (Iterator>) (Iterator) messages.iterator(); messageIter.setSource(downcastIter); vertexUpdateFunction.setOutput(vertexState, out); vertexUpdateFunction.updateVertex(vertexState, messageIter); } else { final Iterator> messageIter = messages.iterator(); if (messageIter.hasNext()) { String message = "Target vertex does not exist!."; try { Tuple2 next = messageIter.next(); message = "Target vertex '" + next.f0 + "' does not exist!."; } catch (Throwable t) {} throw new Exception(message); } else { throw new Exception(); } } } } @SuppressWarnings("serial") private static final class VertexUpdateUdfVVWithDegrees extends VertexUpdateUdf, Message> { private VertexUpdateUdfVVWithDegrees(VertexUpdateFunction, Message> vertexUpdateFunction, TypeInformation>> resultType) { super(vertexUpdateFunction, resultType); } @Override public void coGroup(Iterable> messages, Iterable>> vertex, Collector>> out) throws Exception { final Iterator>> vertexIter = vertex.iterator(); if (vertexIter.hasNext()) { Vertex> vertexWithDegrees = vertexIter.next(); @SuppressWarnings("unchecked") Iterator> downcastIter = (Iterator>) (Iterator) messages.iterator(); messageIter.setSource(downcastIter); vertexUpdateFunction.setInDegree(vertexWithDegrees.f1.f1); vertexUpdateFunction.setOutDegree(vertexWithDegrees.f1.f2); vertexUpdateFunction.setOutputWithDegrees(vertexWithDegrees, out); vertexUpdateFunction.updateVertexFromVertexCentricIteration(vertexWithDegrees, messageIter); } else { final Iterator> messageIter = messages.iterator(); if (messageIter.hasNext()) { String message = "Target vertex does not exist!."; try { Tuple2 next = messageIter.next(); message = "Target vertex '" + next.f0 + "' does not exist!."; } catch (Throwable t) {} throw new Exception(message); } else { throw new Exception(); } } } } /* * UDF that encapsulates the message sending function for graphs where the edges have an associated value. */ private static abstract class MessagingUdfWithEdgeValues extends RichCoGroupFunction, Vertex, Tuple2> implements ResultTypeQueryable> { private static final long serialVersionUID = 1L; final MessagingFunction messagingFunction; private transient TypeInformation> resultType; private MessagingUdfWithEdgeValues(MessagingFunction messagingFunction, TypeInformation> resultType) { this.messagingFunction = messagingFunction; this.resultType = resultType; } @Override public void open(Configuration parameters) throws Exception { if (getIterationRuntimeContext().getSuperstepNumber() == 1) { this.messagingFunction.init(getIterationRuntimeContext()); } this.messagingFunction.preSuperstep(); } @Override public void close() throws Exception { this.messagingFunction.postSuperstep(); } @Override public TypeInformation> getProducedType() { return this.resultType; } } @SuppressWarnings("serial") private static final class MessagingUdfWithEVsSimpleVV extends MessagingUdfWithEdgeValues { private MessagingUdfWithEVsSimpleVV(MessagingFunction messagingFunction, TypeInformation> resultType) { super(messagingFunction, resultType); } @Override public void coGroup(Iterable> edges, Iterable> state, Collector> out) throws Exception { final Iterator> stateIter = state.iterator(); if (stateIter.hasNext()) { Vertex newVertexState = stateIter.next(); messagingFunction.set((Iterator) edges.iterator(), out); messagingFunction.sendMessages(newVertexState); } } } @SuppressWarnings("serial") private static final class MessagingUdfWithEVsVVWithDegrees extends MessagingUdfWithEdgeValues, VV, Message, EV> { private Vertex nextVertex = new Vertex(); private MessagingUdfWithEVsVVWithDegrees(MessagingFunction messagingFunction, TypeInformation> resultType) { super(messagingFunction, resultType); } @Override public void coGroup(Iterable> edges, Iterable>> state, Collector> out) throws Exception { final Iterator>> stateIter = state.iterator(); if (stateIter.hasNext()) { Vertex> vertexWithDegrees = stateIter.next(); nextVertex.setField(vertexWithDegrees.f0, 0); nextVertex.setField(vertexWithDegrees.f1.f0, 1); messagingFunction.setInDegree(vertexWithDegrees.f1.f1); messagingFunction.setOutDegree(vertexWithDegrees.f1.f2); messagingFunction.set((Iterator) edges.iterator(), out); messagingFunction.sendMessages(nextVertex); } } } // -------------------------------------------------------------------------------------------- // UTIL methods // -------------------------------------------------------------------------------------------- /** * Method that builds the messaging function using a coGroup operator for a simple vertex(without * degrees). * It afterwards configures the function with a custom name and broadcast variables. * * @param iteration * @param messageTypeInfo * @param whereArg the argument for the where within the coGroup * @param equalToArg the argument for the equalTo within the coGroup * @return the messaging function */ private CoGroupOperator> buildMessagingFunction( DeltaIteration, Vertex> iteration, TypeInformation> messageTypeInfo, int whereArg, int equalToArg) { // build the messaging function (co group) CoGroupOperator> messages; MessagingUdfWithEdgeValues messenger = new MessagingUdfWithEVsSimpleVV(messagingFunction, messageTypeInfo); messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg) .equalTo(equalToArg).with(messenger); // configure coGroup message function with name and broadcast variables messages = messages.name("Messaging"); if(this.configuration != null) { for (Tuple2> e : this.configuration.getMessagingBcastVars()) { messages = messages.withBroadcastSet(e.f1, e.f0); } } return messages; } /** * Method that builds the messaging function using a coGroup operator for a vertex * containing degree information. * It afterwards configures the function with a custom name and broadcast variables. * * @param iteration * @param messageTypeInfo * @param whereArg the argument for the where within the coGroup * @param equalToArg the argument for the equalTo within the coGroup * @return the messaging function */ private CoGroupOperator> buildMessagingFunctionVerticesWithDegrees( DeltaIteration>, Vertex>> iteration, TypeInformation> messageTypeInfo, int whereArg, int equalToArg) { // build the messaging function (co group) CoGroupOperator> messages; MessagingUdfWithEdgeValues, VV, Message, EV> messenger = new MessagingUdfWithEVsVVWithDegrees(messagingFunction, messageTypeInfo); messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg) .equalTo(equalToArg).with(messenger); // configure coGroup message function with name and broadcast variables messages = messages.name("Messaging"); if (this.configuration != null) { for (Tuple2> e : this.configuration.getMessagingBcastVars()) { messages = messages.withBroadcastSet(e.f1, e.f0); } } return messages; } /** * Helper method which sets up an iteration with the given vertex value(either simple or with degrees) * * @param iteration */ private void setUpIteration(DeltaIteration iteration) { // set up the iteration operator if (this.configuration != null) { iteration.name(this.configuration.getName("Vertex-centric iteration (" + updateFunction + " | " + messagingFunction + ")")); iteration.parallelism(this.configuration.getParallelism()); iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory()); // register all aggregators for (Map.Entry> entry : this.configuration.getAggregators().entrySet()) { iteration.registerAggregator(entry.getKey(), entry.getValue()); } } else { // no configuration provided; set default name iteration.name("Vertex-centric iteration (" + updateFunction + " | " + messagingFunction + ")"); } } /** * Creates the operator that represents this vertex centric graph computation for a simple vertex. * * @param messagingDirection * @param messageTypeInfo * @return the operator */ private DataSet> createResultSimpleVertex(EdgeDirection messagingDirection, TypeInformation> messageTypeInfo) { DataSet> messages; TypeInformation> vertexTypes = initialVertices.getType(); final DeltaIteration, Vertex> iteration = initialVertices.iterateDelta(initialVertices, this.maximumNumberOfIterations, 0); setUpIteration(iteration); switch (messagingDirection) { case IN: messages = buildMessagingFunction(iteration, messageTypeInfo, 1, 0); break; case OUT: messages = buildMessagingFunction(iteration, messageTypeInfo, 0, 0); break; case ALL: messages = buildMessagingFunction(iteration, messageTypeInfo, 1, 0) .union(buildMessagingFunction(iteration, messageTypeInfo, 0, 0)) ; break; default: throw new IllegalArgumentException("Illegal edge direction"); } VertexUpdateUdf updateUdf = new VertexUpdateUdfSimpleVV(updateFunction, vertexTypes); // build the update function (co group) CoGroupOperator> updates = messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf); configureUpdateFunction(updates); return iteration.closeWith(updates, updates); } /** * Creates the operator that represents this vertex centric graph computation for a vertex with in * and out degrees added to the vertex value. * * @param graph * @param messagingDirection * @param messageTypeInfo * @return the operator */ @SuppressWarnings("serial") private DataSet> createResultVerticesWithDegrees(Graph graph, EdgeDirection messagingDirection, TypeInformation> messageTypeInfo) { DataSet> messages; this.updateFunction.setOptDegrees(this.configuration.isOptDegrees()); DataSet> inDegrees = graph.inDegrees(); DataSet> outDegrees = graph.outDegrees(); DataSet> degrees = inDegrees.join(outDegrees).where(0).equalTo(0) .with(new FlatJoinFunction, Tuple2, Tuple3>() { @Override public void join(Tuple2 first, Tuple2 second, Collector> out) { out.collect(new Tuple3(first.f0, first.f1, second.f1)); } }).withForwardedFieldsFirst("f0;f1").withForwardedFieldsSecond("f1"); DataSet>> verticesWithDegrees = initialVertices .join(degrees).where(0).equalTo(0) .with(new FlatJoinFunction, Tuple3, Vertex>>() { @Override public void join(Vertex vertex, Tuple3 degrees, Collector>> out) throws Exception { out.collect(new Vertex>(vertex.getId(), new Tuple3(vertex.getValue(), degrees.f1, degrees.f2))); } }).withForwardedFieldsFirst("f0"); // add type info TypeInformation>> vertexTypes = verticesWithDegrees.getType(); final DeltaIteration>, Vertex>> iteration = verticesWithDegrees.iterateDelta(verticesWithDegrees, this.maximumNumberOfIterations, 0); setUpIteration(iteration); switch (messagingDirection) { case IN: messages = buildMessagingFunctionVerticesWithDegrees(iteration, messageTypeInfo, 1, 0); break; case OUT: messages = buildMessagingFunctionVerticesWithDegrees(iteration, messageTypeInfo, 0, 0); break; case ALL: messages = buildMessagingFunctionVerticesWithDegrees(iteration, messageTypeInfo, 1, 0) .union(buildMessagingFunctionVerticesWithDegrees(iteration, messageTypeInfo, 0, 0)) ; break; default: throw new IllegalArgumentException("Illegal edge direction"); } @SuppressWarnings({ "unchecked", "rawtypes" }) VertexUpdateUdf, Message> updateUdf = new VertexUpdateUdfVVWithDegrees(updateFunction, vertexTypes); // build the update function (co group) CoGroupOperator>> updates = messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf); configureUpdateFunction(updates); return iteration.closeWith(updates, updates).map( new MapFunction>, Vertex>() { public Vertex map(Vertex> vertex) { return new Vertex(vertex.getId(), vertex.getValue().f0); } }); } private void configureUpdateFunction(CoGroupOperator> updates) { // configure coGroup update function with name and broadcast variables updates = updates.name("Vertex State Updates"); if (this.configuration != null) { for (Tuple2> e : this.configuration.getUpdateBcastVars()) { updates = updates.withBroadcastSet(e.f1, e.f0); } } // let the operator know that we preserve the key field updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0"); } }