
org.apache.flink.graph.Graph Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.graph;
import org.apache.flink.api.common.functions.CoGroupFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.graph.asm.translate.TranslateEdgeValues;
import org.apache.flink.graph.asm.translate.TranslateFunction;
import org.apache.flink.graph.asm.translate.TranslateGraphIds;
import org.apache.flink.graph.asm.translate.TranslateVertexValues;
import org.apache.flink.graph.gsa.ApplyFunction;
import org.apache.flink.graph.gsa.GSAConfiguration;
import org.apache.flink.graph.gsa.GatherSumApplyIteration;
import org.apache.flink.graph.gsa.SumFunction;
import org.apache.flink.graph.pregel.ComputeFunction;
import org.apache.flink.graph.pregel.MessageCombiner;
import org.apache.flink.graph.pregel.VertexCentricConfiguration;
import org.apache.flink.graph.pregel.VertexCentricIteration;
import org.apache.flink.graph.spargel.ScatterFunction;
import org.apache.flink.graph.spargel.ScatterGatherConfiguration;
import org.apache.flink.graph.spargel.ScatterGatherIteration;
import org.apache.flink.graph.utils.EdgeToTuple3Map;
import org.apache.flink.graph.utils.Tuple2ToVertexMap;
import org.apache.flink.graph.utils.Tuple3ToEdgeMap;
import org.apache.flink.graph.utils.VertexToTuple2Map;
import org.apache.flink.graph.validation.GraphValidator;
import org.apache.flink.types.LongValue;
import org.apache.flink.types.NullValue;
import org.apache.flink.util.Collector;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
/**
* Represents a Graph consisting of {@link Edge edges} and {@link Vertex
* vertices}.
*
*
* @see org.apache.flink.graph.Edge
* @see org.apache.flink.graph.Vertex
*
* @param the key type for edge and vertex identifiers
* @param the value type for vertices
* @param the value type for edges
*/
@SuppressWarnings("serial")
public class Graph {
private final ExecutionEnvironment context;
private final DataSet> vertices;
private final DataSet> edges;
/**
* Creates a graph from two DataSets: vertices and edges
*
* @param vertices a DataSet of vertices.
* @param edges a DataSet of edges.
* @param context the flink execution environment.
*/
private Graph(DataSet> vertices, DataSet> edges, ExecutionEnvironment context) {
this.vertices = vertices;
this.edges = edges;
this.context = context;
}
/**
* Creates a graph from a Collection of vertices and a Collection of edges.
*
* @param vertices a Collection of vertices.
* @param edges a Collection of edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromCollection(Collection> vertices,
Collection> edges, ExecutionEnvironment context) {
return fromDataSet(context.fromCollection(vertices),
context.fromCollection(edges), context);
}
/**
* Creates a graph from a Collection of edges.
* Vertices are created automatically and their values are set to
* NullValue.
*
* @param edges a Collection of edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromCollection(Collection> edges,
ExecutionEnvironment context) {
return fromDataSet(context.fromCollection(edges), context);
}
/**
* Creates a graph from a Collection of edges.
* Vertices are created automatically and their values are set
* by applying the provided map function to the vertex IDs.
*
* @param edges a Collection of edges.
* @param vertexValueInitializer a map function that initializes the vertex values.
* It allows to apply a map transformation on the vertex ID to produce an initial vertex value.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromCollection(Collection> edges,
final MapFunction vertexValueInitializer, ExecutionEnvironment context) {
return fromDataSet(context.fromCollection(edges), vertexValueInitializer, context);
}
/**
* Creates a graph from a DataSet of vertices and a DataSet of edges.
*
* @param vertices a DataSet of vertices.
* @param edges a DataSet of edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromDataSet(DataSet> vertices,
DataSet> edges, ExecutionEnvironment context) {
return new Graph(vertices, edges, context);
}
/**
* Creates a graph from a DataSet of edges.
* Vertices are created automatically and their values are set to
* NullValue.
*
* @param edges a DataSet of edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromDataSet(
DataSet> edges, ExecutionEnvironment context) {
DataSet> vertices = edges.flatMap(new EmitSrcAndTarget()).distinct();
return new Graph(vertices, edges, context);
}
private static final class EmitSrcAndTarget implements FlatMapFunction<
Edge, Vertex> {
public void flatMap(Edge edge, Collector> out) {
out.collect(new Vertex(edge.f0, NullValue.getInstance()));
out.collect(new Vertex(edge.f1, NullValue.getInstance()));
}
}
/**
* Creates a graph from a DataSet of edges.
* Vertices are created automatically and their values are set
* by applying the provided map function to the vertex IDs.
*
* @param edges a DataSet of edges.
* @param vertexValueInitializer the mapper function that initializes the vertex values.
* It allows to apply a map transformation on the vertex ID to produce an initial vertex value.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromDataSet(DataSet> edges,
final MapFunction vertexValueInitializer, ExecutionEnvironment context) {
TypeInformation keyType = ((TupleTypeInfo>) edges.getType()).getTypeAt(0);
TypeInformation valueType = TypeExtractor.createTypeInfo(
MapFunction.class, vertexValueInitializer.getClass(), 1, null, null);
@SuppressWarnings({ "unchecked", "rawtypes" })
TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo(
Vertex.class, keyType, valueType);
DataSet> vertices = edges
.flatMap(new EmitSrcAndTargetAsTuple1()).distinct()
.map(new MapFunction, Vertex>() {
public Vertex map(Tuple1 value) throws Exception {
return new Vertex(value.f0, vertexValueInitializer.map(value.f0));
}
}).returns(returnType).withForwardedFields("f0");
return new Graph(vertices, edges, context);
}
private static final class EmitSrcAndTargetAsTuple1 implements FlatMapFunction<
Edge, Tuple1> {
public void flatMap(Edge edge, Collector> out) {
out.collect(new Tuple1(edge.f0));
out.collect(new Tuple1(edge.f1));
}
}
/**
* Creates a graph from a DataSet of Tuple2 objects for vertices and
* Tuple3 objects for edges.
*
* The first field of the Tuple2 vertex object will become the vertex ID
* and the second field will become the vertex value.
* The first field of the Tuple3 object for edges will become the source ID,
* the second field will become the target ID, and the third field will become
* the edge value.
*
* @param vertices a DataSet of Tuple2 representing the vertices.
* @param edges a DataSet of Tuple3 representing the edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromTupleDataSet(DataSet> vertices,
DataSet> edges, ExecutionEnvironment context) {
DataSet> vertexDataSet = vertices.map(new Tuple2ToVertexMap());
DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap());
return fromDataSet(vertexDataSet, edgeDataSet, context);
}
/**
* Creates a graph from a DataSet of Tuple3 objects for edges.
*
* The first field of the Tuple3 object will become the source ID,
* the second field will become the target ID, and the third field will become
* the edge value.
*
* Vertices are created automatically and their values are set to NullValue.
*
* @param edges a DataSet of Tuple3 representing the edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromTupleDataSet(DataSet> edges,
ExecutionEnvironment context) {
DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap());
return fromDataSet(edgeDataSet, context);
}
/**
* Creates a graph from a DataSet of Tuple3 objects for edges.
*
* Each Tuple3 will become one Edge, where the source ID will be the first field of the Tuple2,
* the target ID will be the second field of the Tuple2
* and the Edge value will be the third field of the Tuple3.
*
* Vertices are created automatically and their values are initialized
* by applying the provided vertexValueInitializer map function to the vertex IDs.
*
* @param edges a DataSet of Tuple3.
* @param vertexValueInitializer the mapper function that initializes the vertex values.
* It allows to apply a map transformation on the vertex ID to produce an initial vertex value.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromTupleDataSet(DataSet> edges,
final MapFunction vertexValueInitializer, ExecutionEnvironment context) {
DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap());
return fromDataSet(edgeDataSet, vertexValueInitializer, context);
}
/**
* Creates a graph from a DataSet of Tuple2 objects for edges.
* Each Tuple2 will become one Edge, where the source ID will be the first field of the Tuple2
* and the target ID will be the second field of the Tuple2.
*
* Edge value types and Vertex values types will be set to NullValue.
*
* @param edges a DataSet of Tuple2.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromTuple2DataSet(DataSet> edges,
ExecutionEnvironment context) {
DataSet> edgeDataSet = edges.map(
new MapFunction, Edge>() {
public Edge map(Tuple2 input) {
return new Edge(input.f0, input.f1, NullValue.getInstance());
}
}).withForwardedFields("f0; f1");
return fromDataSet(edgeDataSet, context);
}
/**
* Creates a graph from a DataSet of Tuple2 objects for edges.
* Each Tuple2 will become one Edge, where the source ID will be the first field of the Tuple2
* and the target ID will be the second field of the Tuple2.
*
* Edge value types will be set to NullValue.
* Vertex values can be initialized by applying a user-defined map function on the vertex IDs.
*
* @param edges a DataSet of Tuple2, where the first field corresponds to the source ID
* and the second field corresponds to the target ID.
* @param vertexValueInitializer the mapper function that initializes the vertex values.
* It allows to apply a map transformation on the vertex ID to produce an initial vertex value.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromTuple2DataSet(DataSet> edges,
final MapFunction vertexValueInitializer, ExecutionEnvironment context) {
DataSet> edgeDataSet = edges.map(
new MapFunction, Edge>() {
public Edge map(Tuple2 input) {
return new Edge(input.f0, input.f1, NullValue.getInstance());
}
}).withForwardedFields("f0; f1");
return fromDataSet(edgeDataSet, vertexValueInitializer, context);
}
/**
* Creates a Graph from a CSV file of vertices and a CSV file of edges.
*
* @param verticesPath path to a CSV file with the Vertex data.
* @param edgesPath path to a CSV file with the Edge data
* @param context the Flink execution environment.
* @return An instance of {@link org.apache.flink.graph.GraphCsvReader},
* on which calling methods to specify types of the Vertex ID, Vertex value and Edge value returns a Graph.
*
* @see org.apache.flink.graph.GraphCsvReader#types(Class, Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#vertexTypes(Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#edgeTypes(Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#keyType(Class)
*/
public static GraphCsvReader fromCsvReader(String verticesPath, String edgesPath, ExecutionEnvironment context) {
return new GraphCsvReader(verticesPath, edgesPath, context);
}
/**
* Creates a graph from a CSV file of edges. Vertices will be created automatically.
*
* @param edgesPath a path to a CSV file with the Edges data
* @param context the execution environment.
* @return An instance of {@link org.apache.flink.graph.GraphCsvReader},
* on which calling methods to specify types of the Vertex ID, Vertex value and Edge value returns a Graph.
*
* @see org.apache.flink.graph.GraphCsvReader#types(Class, Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#vertexTypes(Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#edgeTypes(Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#keyType(Class)
*/
public static GraphCsvReader fromCsvReader(String edgesPath, ExecutionEnvironment context) {
return new GraphCsvReader(edgesPath, context);
}
/**
* Creates a graph from a CSV file of edges. Vertices will be created automatically and
* Vertex values can be initialized using a user-defined mapper.
*
* @param edgesPath a path to a CSV file with the Edge data
* @param vertexValueInitializer the mapper function that initializes the vertex values.
* It allows to apply a map transformation on the vertex ID to produce an initial vertex value.
* @param context the execution environment.
* @return An instance of {@link org.apache.flink.graph.GraphCsvReader},
* on which calling methods to specify types of the Vertex ID, Vertex Value and Edge value returns a Graph.
*
* @see org.apache.flink.graph.GraphCsvReader#types(Class, Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#vertexTypes(Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#edgeTypes(Class, Class)
* @see org.apache.flink.graph.GraphCsvReader#keyType(Class)
*/
public static GraphCsvReader fromCsvReader(String edgesPath,
final MapFunction vertexValueInitializer, ExecutionEnvironment context) {
return new GraphCsvReader(edgesPath, vertexValueInitializer, context);
}
/**
* @return the flink execution environment.
*/
public ExecutionEnvironment getContext() {
return this.context;
}
/**
* Function that checks whether a Graph is a valid Graph,
* as defined by the given {@link GraphValidator}.
*
* @return true if the Graph is valid.
*/
public Boolean validate(GraphValidator validator) throws Exception {
return validator.validate(this);
}
/**
* @return the vertex DataSet.
*/
public DataSet> getVertices() {
return vertices;
}
/**
* @return the edge DataSet.
*/
public DataSet> getEdges() {
return edges;
}
/**
* @return the vertex DataSet as Tuple2.
*/
public DataSet> getVerticesAsTuple2() {
return vertices.map(new VertexToTuple2Map());
}
/**
* @return the edge DataSet as Tuple3.
*/
public DataSet> getEdgesAsTuple3() {
return edges.map(new EdgeToTuple3Map());
}
/**
* This method allows access to the graph's edge values along with its source and target vertex values.
*
* @return a triplet DataSet consisting of (srcVertexId, trgVertexId, srcVertexValue, trgVertexValue, edgeValue)
*/
public DataSet> getTriplets() {
return this.getVertices().join(this.getEdges()).where(0).equalTo(0)
.with(new ProjectEdgeWithSrcValue())
.join(this.getVertices()).where(1).equalTo(0)
.with(new ProjectEdgeWithVertexValues());
}
@ForwardedFieldsFirst("f1->f2")
@ForwardedFieldsSecond("f0; f1; f2->f3")
private static final class ProjectEdgeWithSrcValue implements
FlatJoinFunction, Edge, Tuple4> {
@Override
public void join(Vertex vertex, Edge edge, Collector> collector)
throws Exception {
collector.collect(new Tuple4(edge.getSource(), edge.getTarget(), vertex.getValue(),
edge.getValue()));
}
}
@ForwardedFieldsFirst("f0; f1; f2; f3->f4")
@ForwardedFieldsSecond("f1->f3")
private static final class ProjectEdgeWithVertexValues implements
FlatJoinFunction, Vertex, Triplet> {
@Override
public void join(Tuple4 tripletWithSrcValSet,
Vertex vertex, Collector> collector) throws Exception {
collector.collect(new Triplet(tripletWithSrcValSet.f0, tripletWithSrcValSet.f1,
tripletWithSrcValSet.f2, vertex.getValue(), tripletWithSrcValSet.f3));
}
}
/**
* Apply a function to the attribute of each vertex in the graph.
*
* @param mapper the map function to apply.
* @return a new graph
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public Graph mapVertices(final MapFunction, NV> mapper) {
TypeInformation keyType = ((TupleTypeInfo>) vertices.getType()).getTypeAt(0);
TypeInformation valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, null, null);
TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo(
Vertex.class, keyType, valueType);
return mapVertices(mapper, returnType);
}
/**
* Apply a function to the attribute of each vertex in the graph.
*
* @param mapper the map function to apply.
* @param returnType the explicit return type.
* @return a new graph
*/
public Graph mapVertices(final MapFunction, NV> mapper, TypeInformation> returnType) {
DataSet> mappedVertices = vertices.map(
new MapFunction, Vertex>() {
public Vertex map(Vertex value) throws Exception {
return new Vertex(value.f0, mapper.map(value));
}
})
.returns(returnType)
.withForwardedFields("f0");
return new Graph(mappedVertices, this.edges, this.context);
}
/**
* Apply a function to the attribute of each edge in the graph.
*
* @param mapper the map function to apply.
* @return a new graph
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public Graph mapEdges(final MapFunction, NV> mapper) {
TypeInformation keyType = ((TupleTypeInfo>) edges.getType()).getTypeAt(0);
TypeInformation valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, null, null);
TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo(
Edge.class, keyType, keyType, valueType);
return mapEdges(mapper, returnType);
}
/**
* Translate {@link Vertex} and {@link Edge} IDs using the given {@link MapFunction}.
*
* @param translator implements conversion from {@code K} to {@code NEW}
* @param new ID type
* @return graph with translated vertex and edge IDs
* @throws Exception
*/
public Graph translateGraphIds(TranslateFunction translator) throws Exception {
return run(new TranslateGraphIds(translator));
}
/**
* Translate {@link Vertex} values using the given {@link MapFunction}.
*
* @param translator implements conversion from {@code VV} to {@code NEW}
* @param new vertex value type
* @return graph with translated vertex values
* @throws Exception
*/
public Graph translateVertexValues(TranslateFunction translator) throws Exception {
return run(new TranslateVertexValues(translator));
}
/**
* Translate {@link Edge} values using the given {@link MapFunction}.
*
* @param translator implements conversion from {@code EV} to {@code NEW}
* @param new edge value type
* @return graph with translated edge values
* @throws Exception
*/
public Graph translateEdgeValues(TranslateFunction translator) throws Exception {
return run(new TranslateEdgeValues(translator));
}
/**
* Apply a function to the attribute of each edge in the graph.
*
* @param mapper the map function to apply.
* @param returnType the explicit return type.
* @return a new graph
*/
public Graph mapEdges(final MapFunction, NV> mapper, TypeInformation> returnType) {
DataSet> mappedEdges = edges.map(
new MapFunction, Edge>() {
public Edge map(Edge value) throws Exception {
return new Edge(value.f0, value.f1, mapper
.map(value));
}
})
.returns(returnType)
.withForwardedFields("f0; f1");
return new Graph(this.vertices, mappedEdges, this.context);
}
/**
* Joins the vertex DataSet of this graph with an input Tuple2 DataSet and applies
* a user-defined transformation on the values of the matched records.
* The vertex ID and the first field of the Tuple2 DataSet are used as the join keys.
*
* @param inputDataSet the Tuple2 DataSet to join with.
* The first field of the Tuple2 is used as the join key and the second field is passed
* as a parameter to the transformation function.
* @param vertexJoinFunction the transformation function to apply.
* The first parameter is the current vertex value and the second parameter is the value
* of the matched Tuple2 from the input DataSet.
* @return a new Graph, where the vertex values have been updated according to the
* result of the vertexJoinFunction.
*
* @param the type of the second field of the input Tuple2 DataSet.
*/
public Graph joinWithVertices(DataSet> inputDataSet,
final VertexJoinFunction vertexJoinFunction) {
DataSet> resultedVertices = this.getVertices()
.coGroup(inputDataSet).where(0).equalTo(0)
.with(new ApplyCoGroupToVertexValues(vertexJoinFunction));
return new Graph(resultedVertices, this.edges, this.context);
}
private static final class ApplyCoGroupToVertexValues
implements CoGroupFunction, Tuple2, Vertex> {
private VertexJoinFunction vertexJoinFunction;
public ApplyCoGroupToVertexValues(VertexJoinFunction mapper) {
this.vertexJoinFunction = mapper;
}
@Override
public void coGroup(Iterable> vertices,
Iterable> input, Collector> collector) throws Exception {
final Iterator> vertexIterator = vertices.iterator();
final Iterator> inputIterator = input.iterator();
if (vertexIterator.hasNext()) {
if (inputIterator.hasNext()) {
final Tuple2 inputNext = inputIterator.next();
collector.collect(new Vertex(inputNext.f0, vertexJoinFunction
.vertexJoin(vertexIterator.next().f1, inputNext.f1)));
} else {
collector.collect(vertexIterator.next());
}
}
}
}
/**
* Joins the edge DataSet with an input DataSet on the composite key of both
* source and target IDs and applies a user-defined transformation on the values
* of the matched records. The first two fields of the input DataSet are used as join keys.
*
* @param inputDataSet the DataSet to join with.
* The first two fields of the Tuple3 are used as the composite join key
* and the third field is passed as a parameter to the transformation function.
* @param edgeJoinFunction the transformation function to apply.
* The first parameter is the current edge value and the second parameter is the value
* of the matched Tuple3 from the input DataSet.
* @param the type of the third field of the input Tuple3 DataSet.
* @return a new Graph, where the edge values have been updated according to the
* result of the edgeJoinFunction.
*/
public Graph joinWithEdges(DataSet> inputDataSet,
final EdgeJoinFunction edgeJoinFunction) {
DataSet> resultedEdges = this.getEdges()
.coGroup(inputDataSet).where(0, 1).equalTo(0, 1)
.with(new ApplyCoGroupToEdgeValues(edgeJoinFunction));
return new Graph(this.vertices, resultedEdges, this.context);
}
private static final class ApplyCoGroupToEdgeValues
implements CoGroupFunction, Tuple3, Edge> {
private EdgeJoinFunction edgeJoinFunction;
public ApplyCoGroupToEdgeValues(EdgeJoinFunction mapper) {
this.edgeJoinFunction = mapper;
}
@Override
public void coGroup(Iterable> edges, Iterable> input,
Collector> collector) throws Exception {
final Iterator> edgesIterator = edges.iterator();
final Iterator> inputIterator = input.iterator();
if (edgesIterator.hasNext()) {
if (inputIterator.hasNext()) {
final Tuple3 inputNext = inputIterator.next();
collector.collect(new Edge(inputNext.f0,
inputNext.f1, edgeJoinFunction.edgeJoin(
edgesIterator.next().f2, inputNext.f2)));
} else {
collector.collect(edgesIterator.next());
}
}
}
}
/**
* Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation
* on the values of the matched records.
* The source ID of the edges input and the first field of the input DataSet are used as join keys.
*
* @param inputDataSet the DataSet to join with.
* The first field of the Tuple2 is used as the join key
* and the second field is passed as a parameter to the transformation function.
* @param edgeJoinFunction the transformation function to apply.
* The first parameter is the current edge value and the second parameter is the value
* of the matched Tuple2 from the input DataSet.
* @param the type of the second field of the input Tuple2 DataSet.
* @return a new Graph, where the edge values have been updated according to the
* result of the edgeJoinFunction.
*/
public Graph joinWithEdgesOnSource(DataSet> inputDataSet,
final EdgeJoinFunction edgeJoinFunction) {
DataSet> resultedEdges = this.getEdges()
.coGroup(inputDataSet).where(0).equalTo(0)
.with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget(edgeJoinFunction));
return new Graph(this.vertices, resultedEdges, this.context);
}
private static final class ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget
implements CoGroupFunction, Tuple2, Edge> {
private EdgeJoinFunction edgeJoinFunction;
public ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget(EdgeJoinFunction mapper) {
this.edgeJoinFunction = mapper;
}
@Override
public void coGroup(Iterable> edges,
Iterable> input, Collector> collector) throws Exception {
final Iterator> edgesIterator = edges.iterator();
final Iterator> inputIterator = input.iterator();
if (inputIterator.hasNext()) {
final Tuple2 inputNext = inputIterator.next();
while (edgesIterator.hasNext()) {
Edge edgesNext = edgesIterator.next();
collector.collect(new Edge(edgesNext.f0,
edgesNext.f1, edgeJoinFunction.edgeJoin(edgesNext.f2, inputNext.f1)));
}
} else {
while (edgesIterator.hasNext()) {
collector.collect(edgesIterator.next());
}
}
}
}
/**
* Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation
* on the values of the matched records.
* The target ID of the edges input and the first field of the input DataSet are used as join keys.
*
* @param inputDataSet the DataSet to join with.
* The first field of the Tuple2 is used as the join key
* and the second field is passed as a parameter to the transformation function.
* @param edgeJoinFunction the transformation function to apply.
* The first parameter is the current edge value and the second parameter is the value
* of the matched Tuple2 from the input DataSet.
* @param the type of the second field of the input Tuple2 DataSet.
* @return a new Graph, where the edge values have been updated according to the
* result of the edgeJoinFunction.
*/
public Graph joinWithEdgesOnTarget(DataSet> inputDataSet,
final EdgeJoinFunction