org.apache.flink.graph.spargel.VertexCentricIteration Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.graph.spargel;
import java.util.Iterator;
import java.util.Map;
import org.apache.flink.api.common.aggregators.Aggregator;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DeltaIteration;
import org.apache.flink.api.common.functions.RichCoGroupFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.operators.CoGroupOperator;
import org.apache.flink.api.java.operators.CustomUnaryOperation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.graph.Edge;
import org.apache.flink.graph.EdgeDirection;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.Vertex;
import org.apache.flink.util.Collector;
import com.google.common.base.Preconditions;
/**
* This class represents iterative graph computations, programmed in a vertex-centric perspective.
* It is a special case of Bulk Synchronous Parallel computation. The paradigm has also been
* implemented by Google's Pregel system and by Apache Giraph.
*
* Vertex centric algorithms operate on graphs, which are defined through vertices and edges. The
* algorithms send messages along the edges and update the state of vertices based on
* the old state and the incoming messages. All vertices have an initial state.
* The computation terminates once no vertex updates it state any more.
* Additionally, a maximum number of iterations (supersteps) may be specified.
*
* The computation is here represented by two functions:
*
* - The {@link VertexUpdateFunction} receives incoming messages and may updates the state for
* the vertex. If a state is updated, messages are sent from this vertex. Initially, all vertices are
* considered updated.
* - The {@link MessagingFunction} takes the new vertex state and sends messages along the outgoing
* edges of the vertex. The outgoing edges may optionally have an associated value, such as a weight.
*
*
*
* Vertex-centric graph iterations are are run by calling
* {@link Graph#runVertexCentricIteration(VertexUpdateFunction, MessagingFunction, int)}.
*
* @param The type of the vertex key (the vertex identifier).
* @param The type of the vertex value (the state of the vertex).
* @param The type of the message sent between vertices along the edges.
* @param The type of the values that are associated with the edges.
*/
public class VertexCentricIteration
implements CustomUnaryOperation, Vertex>
{
private final VertexUpdateFunction updateFunction;
private final MessagingFunction messagingFunction;
private final DataSet> edgesWithValue;
private final int maximumNumberOfIterations;
private final TypeInformation messageType;
private DataSet> initialVertices;
private VertexCentricConfiguration configuration;
// ----------------------------------------------------------------------------------
private VertexCentricIteration(VertexUpdateFunction uf,
MessagingFunction mf,
DataSet> edgesWithValue,
int maximumNumberOfIterations)
{
Preconditions.checkNotNull(uf);
Preconditions.checkNotNull(mf);
Preconditions.checkNotNull(edgesWithValue);
Preconditions.checkArgument(maximumNumberOfIterations > 0, "The maximum number of iterations must be at least one.");
this.updateFunction = uf;
this.messagingFunction = mf;
this.edgesWithValue = edgesWithValue;
this.maximumNumberOfIterations = maximumNumberOfIterations;
this.messageType = getMessageType(mf);
}
private TypeInformation getMessageType(MessagingFunction mf) {
return TypeExtractor.createTypeInfo(MessagingFunction.class, mf.getClass(), 2, null, null);
}
// --------------------------------------------------------------------------------------------
// Custom Operator behavior
// --------------------------------------------------------------------------------------------
/**
* Sets the input data set for this operator. In the case of this operator this input data set represents
* the set of vertices with their initial state.
*
* @param inputData The input data set, which in the case of this operator represents the set of
* vertices with their initial state.
*
* @see org.apache.flink.api.java.operators.CustomUnaryOperation#setInput(org.apache.flink.api.java.DataSet)
*/
@Override
public void setInput(DataSet> inputData) {
this.initialVertices = inputData;
}
/**
* Creates the operator that represents this vertex-centric graph computation.
*
* @return The operator that represents this vertex-centric graph computation.
*/
@Override
public DataSet> createResult() {
if (this.initialVertices == null) {
throw new IllegalStateException("The input data set has not been set.");
}
// prepare some type information
TypeInformation keyType = ((TupleTypeInfo>) initialVertices.getType()).getTypeAt(0);
TypeInformation> messageTypeInfo = new TupleTypeInfo>(keyType, messageType);
// create a graph
Graph graph =
Graph.fromDataSet(initialVertices, edgesWithValue, ExecutionEnvironment.getExecutionEnvironment());
// check whether the numVertices option is set and, if so, compute the total number of vertices
// and set it within the messaging and update functions
if (this.configuration != null && this.configuration.isOptNumVertices()) {
try {
long numberOfVertices = graph.numberOfVertices();
messagingFunction.setNumberOfVertices(numberOfVertices);
updateFunction.setNumberOfVertices(numberOfVertices);
} catch (Exception e) {
e.printStackTrace();
}
}
if(this.configuration != null) {
messagingFunction.setDirection(this.configuration.getDirection());
} else {
messagingFunction.setDirection(EdgeDirection.OUT);
}
// retrieve the direction in which the updates are made and in which the messages are sent
EdgeDirection messagingDirection = messagingFunction.getDirection();
// check whether the degrees option is set and, if so, compute the in and the out degrees and
// add them to the vertex value
if(this.configuration != null && this.configuration.isOptDegrees()) {
return createResultVerticesWithDegrees(graph, messagingDirection, messageTypeInfo);
} else {
return createResultSimpleVertex(messagingDirection, messageTypeInfo);
}
}
/**
* Creates a new vertex-centric iteration operator for graphs where the edges are associated with a value (such as
* a weight or distance).
*
* @param edgesWithValue The data set containing edges.
* @param uf The function that updates the state of the vertices from the incoming messages.
* @param mf The function that turns changed vertex states into messages along the edges.
*
* @param The type of the vertex key (the vertex identifier).
* @param The type of the vertex value (the state of the vertex).
* @param The type of the message sent between vertices along the edges.
* @param The type of the values that are associated with the edges.
*
* @return An in stance of the vertex-centric graph computation operator.
*/
public static final
VertexCentricIteration withEdges(
DataSet> edgesWithValue,
VertexUpdateFunction uf,
MessagingFunction mf,
int maximumNumberOfIterations)
{
return new VertexCentricIteration(uf, mf, edgesWithValue, maximumNumberOfIterations);
}
/**
* Configures this vertex-centric iteration with the provided parameters.
*
* @param parameters the configuration parameters
*/
public void configure(VertexCentricConfiguration parameters) {
this.configuration = parameters;
}
/**
* @return the configuration parameters of this vertex-centric iteration
*/
public VertexCentricConfiguration getIterationConfiguration() {
return this.configuration;
}
// --------------------------------------------------------------------------------------------
// Wrapping UDFs
// --------------------------------------------------------------------------------------------
private static abstract class VertexUpdateUdf extends RichCoGroupFunction<
Tuple2, Vertex, Vertex>
implements ResultTypeQueryable>
{
private static final long serialVersionUID = 1L;
final VertexUpdateFunction vertexUpdateFunction;
final MessageIterator messageIter = new MessageIterator();
private transient TypeInformation> resultType;
private VertexUpdateUdf(VertexUpdateFunction vertexUpdateFunction,
TypeInformation> resultType)
{
this.vertexUpdateFunction = vertexUpdateFunction;
this.resultType = resultType;
}
@Override
public void open(Configuration parameters) throws Exception {
if (getIterationRuntimeContext().getSuperstepNumber() == 1) {
this.vertexUpdateFunction.init(getIterationRuntimeContext());
}
this.vertexUpdateFunction.preSuperstep();
}
@Override
public void close() throws Exception {
this.vertexUpdateFunction.postSuperstep();
}
@Override
public TypeInformation> getProducedType() {
return this.resultType;
}
}
@SuppressWarnings("serial")
private static final class VertexUpdateUdfSimpleVV extends VertexUpdateUdf {
private VertexUpdateUdfSimpleVV(VertexUpdateFunction vertexUpdateFunction, TypeInformation> resultType) {
super(vertexUpdateFunction, resultType);
}
@Override
public void coGroup(Iterable> messages,
Iterable> vertex,
Collector> out) throws Exception {
final Iterator> vertexIter = vertex.iterator();
if (vertexIter.hasNext()) {
Vertex vertexState = vertexIter.next();
@SuppressWarnings("unchecked")
Iterator> downcastIter = (Iterator>) (Iterator>) messages.iterator();
messageIter.setSource(downcastIter);
vertexUpdateFunction.setOutput(vertexState, out);
vertexUpdateFunction.updateVertex(vertexState, messageIter);
}
else {
final Iterator> messageIter = messages.iterator();
if (messageIter.hasNext()) {
String message = "Target vertex does not exist!.";
try {
Tuple2 next = messageIter.next();
message = "Target vertex '" + next.f0 + "' does not exist!.";
} catch (Throwable t) {}
throw new Exception(message);
} else {
throw new Exception();
}
}
}
}
@SuppressWarnings("serial")
private static final class VertexUpdateUdfVVWithDegrees extends VertexUpdateUdf, Message> {
private VertexUpdateUdfVVWithDegrees(VertexUpdateFunction, Message> vertexUpdateFunction,
TypeInformation>> resultType) {
super(vertexUpdateFunction, resultType);
}
@Override
public void coGroup(Iterable> messages, Iterable>> vertex,
Collector>> out) throws Exception {
final Iterator>> vertexIter = vertex.iterator();
if (vertexIter.hasNext()) {
Vertex> vertexWithDegrees = vertexIter.next();
@SuppressWarnings("unchecked")
Iterator> downcastIter = (Iterator>) (Iterator>) messages.iterator();
messageIter.setSource(downcastIter);
vertexUpdateFunction.setInDegree(vertexWithDegrees.f1.f1);
vertexUpdateFunction.setOutDegree(vertexWithDegrees.f1.f2);
vertexUpdateFunction.setOutputWithDegrees(vertexWithDegrees, out);
vertexUpdateFunction.updateVertexFromVertexCentricIteration(vertexWithDegrees, messageIter);
}
else {
final Iterator> messageIter = messages.iterator();
if (messageIter.hasNext()) {
String message = "Target vertex does not exist!.";
try {
Tuple2 next = messageIter.next();
message = "Target vertex '" + next.f0 + "' does not exist!.";
} catch (Throwable t) {}
throw new Exception(message);
} else {
throw new Exception();
}
}
}
}
/*
* UDF that encapsulates the message sending function for graphs where the edges have an associated value.
*/
private static abstract class MessagingUdfWithEdgeValues
extends RichCoGroupFunction, Vertex, Tuple2>
implements ResultTypeQueryable>
{
private static final long serialVersionUID = 1L;
final MessagingFunction messagingFunction;
private transient TypeInformation> resultType;
private MessagingUdfWithEdgeValues(MessagingFunction messagingFunction,
TypeInformation> resultType)
{
this.messagingFunction = messagingFunction;
this.resultType = resultType;
}
@Override
public void open(Configuration parameters) throws Exception {
if (getIterationRuntimeContext().getSuperstepNumber() == 1) {
this.messagingFunction.init(getIterationRuntimeContext());
}
this.messagingFunction.preSuperstep();
}
@Override
public void close() throws Exception {
this.messagingFunction.postSuperstep();
}
@Override
public TypeInformation> getProducedType() {
return this.resultType;
}
}
@SuppressWarnings("serial")
private static final class MessagingUdfWithEVsSimpleVV
extends MessagingUdfWithEdgeValues {
private MessagingUdfWithEVsSimpleVV(MessagingFunction messagingFunction,
TypeInformation> resultType) {
super(messagingFunction, resultType);
}
@Override
public void coGroup(Iterable> edges,
Iterable> state,
Collector> out) throws Exception {
final Iterator> stateIter = state.iterator();
if (stateIter.hasNext()) {
Vertex newVertexState = stateIter.next();
messagingFunction.set((Iterator>) edges.iterator(), out);
messagingFunction.sendMessages(newVertexState);
}
}
}
@SuppressWarnings("serial")
private static final class MessagingUdfWithEVsVVWithDegrees
extends MessagingUdfWithEdgeValues, VV, Message, EV> {
private Vertex nextVertex = new Vertex();
private MessagingUdfWithEVsVVWithDegrees(MessagingFunction messagingFunction,
TypeInformation> resultType) {
super(messagingFunction, resultType);
}
@Override
public void coGroup(Iterable> edges, Iterable>> state,
Collector> out) throws Exception {
final Iterator>> stateIter = state.iterator();
if (stateIter.hasNext()) {
Vertex> vertexWithDegrees = stateIter.next();
nextVertex.setField(vertexWithDegrees.f0, 0);
nextVertex.setField(vertexWithDegrees.f1.f0, 1);
messagingFunction.setInDegree(vertexWithDegrees.f1.f1);
messagingFunction.setOutDegree(vertexWithDegrees.f1.f2);
messagingFunction.set((Iterator>) edges.iterator(), out);
messagingFunction.sendMessages(nextVertex);
}
}
}
// --------------------------------------------------------------------------------------------
// UTIL methods
// --------------------------------------------------------------------------------------------
/**
* Method that builds the messaging function using a coGroup operator for a simple vertex(without
* degrees).
* It afterwards configures the function with a custom name and broadcast variables.
*
* @param iteration
* @param messageTypeInfo
* @param whereArg the argument for the where within the coGroup
* @param equalToArg the argument for the equalTo within the coGroup
* @return the messaging function
*/
private CoGroupOperator, ?, Tuple2> buildMessagingFunction(
DeltaIteration, Vertex> iteration,
TypeInformation> messageTypeInfo, int whereArg, int equalToArg) {
// build the messaging function (co group)
CoGroupOperator, ?, Tuple2> messages;
MessagingUdfWithEdgeValues messenger =
new MessagingUdfWithEVsSimpleVV(messagingFunction, messageTypeInfo);
messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
.equalTo(equalToArg).with(messenger);
// configure coGroup message function with name and broadcast variables
messages = messages.name("Messaging");
if(this.configuration != null) {
for (Tuple2> e : this.configuration.getMessagingBcastVars()) {
messages = messages.withBroadcastSet(e.f1, e.f0);
}
}
return messages;
}
/**
* Method that builds the messaging function using a coGroup operator for a vertex
* containing degree information.
* It afterwards configures the function with a custom name and broadcast variables.
*
* @param iteration
* @param messageTypeInfo
* @param whereArg the argument for the where within the coGroup
* @param equalToArg the argument for the equalTo within the coGroup
* @return the messaging function
*/
private CoGroupOperator, ?, Tuple2> buildMessagingFunctionVerticesWithDegrees(
DeltaIteration>, Vertex>> iteration,
TypeInformation> messageTypeInfo, int whereArg, int equalToArg) {
// build the messaging function (co group)
CoGroupOperator, ?, Tuple2> messages;
MessagingUdfWithEdgeValues, VV, Message, EV> messenger =
new MessagingUdfWithEVsVVWithDegrees(messagingFunction, messageTypeInfo);
messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
.equalTo(equalToArg).with(messenger);
// configure coGroup message function with name and broadcast variables
messages = messages.name("Messaging");
if (this.configuration != null) {
for (Tuple2> e : this.configuration.getMessagingBcastVars()) {
messages = messages.withBroadcastSet(e.f1, e.f0);
}
}
return messages;
}
/**
* Helper method which sets up an iteration with the given vertex value(either simple or with degrees)
*
* @param iteration
*/
private void setUpIteration(DeltaIteration, ?> iteration) {
// set up the iteration operator
if (this.configuration != null) {
iteration.name(this.configuration.getName("Vertex-centric iteration (" + updateFunction + " | " + messagingFunction + ")"));
iteration.parallelism(this.configuration.getParallelism());
iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());
// register all aggregators
for (Map.Entry> entry : this.configuration.getAggregators().entrySet()) {
iteration.registerAggregator(entry.getKey(), entry.getValue());
}
}
else {
// no configuration provided; set default name
iteration.name("Vertex-centric iteration (" + updateFunction + " | " + messagingFunction + ")");
}
}
/**
* Creates the operator that represents this vertex centric graph computation for a simple vertex.
*
* @param messagingDirection
* @param messageTypeInfo
* @return the operator
*/
private DataSet> createResultSimpleVertex(EdgeDirection messagingDirection,
TypeInformation> messageTypeInfo) {
DataSet> messages;
TypeInformation> vertexTypes = initialVertices.getType();
final DeltaIteration, Vertex> iteration =
initialVertices.iterateDelta(initialVertices, this.maximumNumberOfIterations, 0);
setUpIteration(iteration);
switch (messagingDirection) {
case IN:
messages = buildMessagingFunction(iteration, messageTypeInfo, 1, 0);
break;
case OUT:
messages = buildMessagingFunction(iteration, messageTypeInfo, 0, 0);
break;
case ALL:
messages = buildMessagingFunction(iteration, messageTypeInfo, 1, 0)
.union(buildMessagingFunction(iteration, messageTypeInfo, 0, 0)) ;
break;
default:
throw new IllegalArgumentException("Illegal edge direction");
}
VertexUpdateUdf updateUdf =
new VertexUpdateUdfSimpleVV(updateFunction, vertexTypes);
// build the update function (co group)
CoGroupOperator, ?, Vertex> updates =
messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf);
configureUpdateFunction(updates);
return iteration.closeWith(updates, updates);
}
/**
* Creates the operator that represents this vertex centric graph computation for a vertex with in
* and out degrees added to the vertex value.
*
* @param graph
* @param messagingDirection
* @param messageTypeInfo
* @return the operator
*/
@SuppressWarnings("serial")
private DataSet> createResultVerticesWithDegrees(Graph graph, EdgeDirection messagingDirection,
TypeInformation> messageTypeInfo) {
DataSet> messages;
this.updateFunction.setOptDegrees(this.configuration.isOptDegrees());
DataSet> inDegrees = graph.inDegrees();
DataSet> outDegrees = graph.outDegrees();
DataSet> degrees = inDegrees.join(outDegrees).where(0).equalTo(0)
.with(new FlatJoinFunction, Tuple2, Tuple3>() {
@Override
public void join(Tuple2 first, Tuple2 second, Collector> out) {
out.collect(new Tuple3(first.f0, first.f1, second.f1));
}
}).withForwardedFieldsFirst("f0;f1").withForwardedFieldsSecond("f1");
DataSet>> verticesWithDegrees = initialVertices
.join(degrees).where(0).equalTo(0)
.with(new FlatJoinFunction, Tuple3, Vertex>>() {
@Override
public void join(Vertex vertex, Tuple3 degrees,
Collector>> out) throws Exception {
out.collect(new Vertex>(vertex.getId(),
new Tuple3(vertex.getValue(), degrees.f1, degrees.f2)));
}
}).withForwardedFieldsFirst("f0");
// add type info
TypeInformation>> vertexTypes = verticesWithDegrees.getType();
final DeltaIteration>, Vertex>> iteration =
verticesWithDegrees.iterateDelta(verticesWithDegrees, this.maximumNumberOfIterations, 0);
setUpIteration(iteration);
switch (messagingDirection) {
case IN:
messages = buildMessagingFunctionVerticesWithDegrees(iteration, messageTypeInfo, 1, 0);
break;
case OUT:
messages = buildMessagingFunctionVerticesWithDegrees(iteration, messageTypeInfo, 0, 0);
break;
case ALL:
messages = buildMessagingFunctionVerticesWithDegrees(iteration, messageTypeInfo, 1, 0)
.union(buildMessagingFunctionVerticesWithDegrees(iteration, messageTypeInfo, 0, 0)) ;
break;
default:
throw new IllegalArgumentException("Illegal edge direction");
}
@SuppressWarnings({ "unchecked", "rawtypes" })
VertexUpdateUdf, Message> updateUdf =
new VertexUpdateUdfVVWithDegrees(updateFunction, vertexTypes);
// build the update function (co group)
CoGroupOperator, ?, Vertex>> updates =
messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf);
configureUpdateFunction(updates);
return iteration.closeWith(updates, updates).map(
new MapFunction>, Vertex>() {
public Vertex map(Vertex> vertex) {
return new Vertex(vertex.getId(), vertex.getValue().f0);
}
});
}
private void configureUpdateFunction(CoGroupOperator, ?, Vertex> updates) {
// configure coGroup update function with name and broadcast variables
updates = updates.name("Vertex State Updates");
if (this.configuration != null) {
for (Tuple2> e : this.configuration.getUpdateBcastVars()) {
updates = updates.withBroadcastSet(e.f1, e.f0);
}
}
// let the operator know that we preserve the key field
updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
}
}