All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.graph.Graph Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.graph;

import org.apache.flink.api.common.functions.CoGroupFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.graph.asm.translate.TranslateEdgeValues;
import org.apache.flink.graph.asm.translate.TranslateFunction;
import org.apache.flink.graph.asm.translate.TranslateGraphIds;
import org.apache.flink.graph.asm.translate.TranslateVertexValues;
import org.apache.flink.graph.gsa.ApplyFunction;
import org.apache.flink.graph.gsa.GSAConfiguration;
import org.apache.flink.graph.gsa.GatherSumApplyIteration;
import org.apache.flink.graph.gsa.SumFunction;
import org.apache.flink.graph.pregel.ComputeFunction;
import org.apache.flink.graph.pregel.MessageCombiner;
import org.apache.flink.graph.pregel.VertexCentricConfiguration;
import org.apache.flink.graph.pregel.VertexCentricIteration;
import org.apache.flink.graph.spargel.ScatterFunction;
import org.apache.flink.graph.spargel.ScatterGatherConfiguration;
import org.apache.flink.graph.spargel.ScatterGatherIteration;
import org.apache.flink.graph.utils.EdgeToTuple3Map;
import org.apache.flink.graph.utils.Tuple2ToVertexMap;
import org.apache.flink.graph.utils.Tuple3ToEdgeMap;
import org.apache.flink.graph.utils.VertexToTuple2Map;
import org.apache.flink.graph.validation.GraphValidator;
import org.apache.flink.types.LongValue;
import org.apache.flink.types.NullValue;
import org.apache.flink.util.Collector;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

/**
 * Represents a Graph consisting of {@link Edge edges} and {@link Vertex
 * vertices}.
 * 
 * 
 * @see org.apache.flink.graph.Edge
 * @see org.apache.flink.graph.Vertex
 * 
 * @param  the key type for edge and vertex identifiers
 * @param  the value type for vertices
 * @param  the value type for edges
 */
@SuppressWarnings("serial")
public class Graph {

	private final ExecutionEnvironment context;
	private final DataSet> vertices;
	private final DataSet> edges;

	/**
	 * Creates a graph from two DataSets: vertices and edges
	 * 
	 * @param vertices a DataSet of vertices.
	 * @param edges a DataSet of edges.
	 * @param context the flink execution environment.
	 */
	private Graph(DataSet> vertices, DataSet> edges, ExecutionEnvironment context) {
		this.vertices = vertices;
		this.edges = edges;
		this.context = context;
	}

	/**
	 * Creates a graph from a Collection of vertices and a Collection of edges.
	 * 
	 * @param vertices a Collection of vertices.
	 * @param edges a Collection of edges.
	 * @param context the flink execution environment.
	 * @return the newly created graph.
	 */
	public static  Graph fromCollection(Collection> vertices,
			Collection> edges, ExecutionEnvironment context) {

		return fromDataSet(context.fromCollection(vertices),
				context.fromCollection(edges), context);
	}

	/**
	 * Creates a graph from a Collection of edges.
	 * Vertices are created automatically and their values are set to
	 * NullValue.
	 * 
	 * @param edges a Collection of edges.
	 * @param context the flink execution environment.
	 * @return the newly created graph.
	 */
	public static  Graph fromCollection(Collection> edges,
			ExecutionEnvironment context) {

		return fromDataSet(context.fromCollection(edges), context);
	}

	/**
	 * Creates a graph from a Collection of edges.
	 * Vertices are created automatically and their values are set 
	 * by applying the provided map function to the vertex IDs.
	 * 
	 * @param edges a Collection of edges.
	 * @param vertexValueInitializer a map function that initializes the vertex values.
	 * It allows to apply a map transformation on the vertex ID to produce an initial vertex value. 
	 * @param context the flink execution environment.
	 * @return the newly created graph.
	 */
	public static  Graph fromCollection(Collection> edges,
			final MapFunction vertexValueInitializer, ExecutionEnvironment context) {

		return fromDataSet(context.fromCollection(edges), vertexValueInitializer, context);
	}

	/**
	 * Creates a graph from a DataSet of vertices and a DataSet of edges.
	 * 
	 * @param vertices a DataSet of vertices.
	 * @param edges a DataSet of edges.
	 * @param context the flink execution environment.
	 * @return the newly created graph.
	 */
	public static  Graph fromDataSet(DataSet> vertices,
			DataSet> edges, ExecutionEnvironment context) {

		return new Graph(vertices, edges, context);
	}

	/**
	 * Creates a graph from a DataSet of edges.
	 * Vertices are created automatically and their values are set to
	 * NullValue.
	 * 
	 * @param edges a DataSet of edges.
	 * @param context the flink execution environment.
	 * @return the newly created graph.
	 */
	public static  Graph fromDataSet(
			DataSet> edges, ExecutionEnvironment context) {

		DataSet> vertices = edges.flatMap(new EmitSrcAndTarget()).distinct();

		return new Graph(vertices, edges, context);
	}

	private static final class EmitSrcAndTarget implements FlatMapFunction<
			Edge, Vertex> {

		public void flatMap(Edge edge, Collector> out) {
			out.collect(new Vertex(edge.f0, NullValue.getInstance()));
			out.collect(new Vertex(edge.f1, NullValue.getInstance()));
		}
	}

	/**
	 * Creates a graph from a DataSet of edges.
	 * Vertices are created automatically and their values are set
	 * by applying the provided map function to the vertex IDs.
	 * 
	 * @param edges a DataSet of edges.
	 * @param vertexValueInitializer the mapper function that initializes the vertex values.
	 * It allows to apply a map transformation on the vertex ID to produce an initial vertex value.
	 * @param context the flink execution environment.
	 * @return the newly created graph.
	 */
	public static  Graph fromDataSet(DataSet> edges,
			final MapFunction vertexValueInitializer, ExecutionEnvironment context) {

		TypeInformation keyType = ((TupleTypeInfo) edges.getType()).getTypeAt(0);

		TypeInformation valueType = TypeExtractor.createTypeInfo(
				MapFunction.class, vertexValueInitializer.getClass(), 1, null, null);

		@SuppressWarnings({ "unchecked", "rawtypes" })
		TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo(
				Vertex.class, keyType, valueType);

		DataSet> vertices = edges
				.flatMap(new EmitSrcAndTargetAsTuple1()).distinct()
				.map(new MapFunction, Vertex>() {
					public Vertex map(Tuple1 value) throws Exception {
						return new Vertex(value.f0, vertexValueInitializer.map(value.f0));
					}
				}).returns(returnType).withForwardedFields("f0");

		return new Graph(vertices, edges, context);
	}

	private static final class EmitSrcAndTargetAsTuple1 implements FlatMapFunction<
		Edge, Tuple1> {

		public void flatMap(Edge edge, Collector> out) {
			out.collect(new Tuple1(edge.f0));
			out.collect(new Tuple1(edge.f1));
		}
	}

	/**
	 * Creates a graph from a DataSet of Tuple2 objects for vertices and 
	 * Tuple3 objects for edges.
	 * 

* The first field of the Tuple2 vertex object will become the vertex ID * and the second field will become the vertex value. * The first field of the Tuple3 object for edges will become the source ID, * the second field will become the target ID, and the third field will become * the edge value. * * @param vertices a DataSet of Tuple2 representing the vertices. * @param edges a DataSet of Tuple3 representing the edges. * @param context the flink execution environment. * @return the newly created graph. */ public static Graph fromTupleDataSet(DataSet> vertices, DataSet> edges, ExecutionEnvironment context) { DataSet> vertexDataSet = vertices.map(new Tuple2ToVertexMap()); DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap()); return fromDataSet(vertexDataSet, edgeDataSet, context); } /** * Creates a graph from a DataSet of Tuple3 objects for edges. *

* The first field of the Tuple3 object will become the source ID, * the second field will become the target ID, and the third field will become * the edge value. *

* Vertices are created automatically and their values are set to NullValue. * * @param edges a DataSet of Tuple3 representing the edges. * @param context the flink execution environment. * @return the newly created graph. */ public static Graph fromTupleDataSet(DataSet> edges, ExecutionEnvironment context) { DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap()); return fromDataSet(edgeDataSet, context); } /** * Creates a graph from a DataSet of Tuple3 objects for edges. *

* Each Tuple3 will become one Edge, where the source ID will be the first field of the Tuple2, * the target ID will be the second field of the Tuple2 * and the Edge value will be the third field of the Tuple3. *

* Vertices are created automatically and their values are initialized * by applying the provided vertexValueInitializer map function to the vertex IDs. * * @param edges a DataSet of Tuple3. * @param vertexValueInitializer the mapper function that initializes the vertex values. * It allows to apply a map transformation on the vertex ID to produce an initial vertex value. * @param context the flink execution environment. * @return the newly created graph. */ public static Graph fromTupleDataSet(DataSet> edges, final MapFunction vertexValueInitializer, ExecutionEnvironment context) { DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap()); return fromDataSet(edgeDataSet, vertexValueInitializer, context); } /** * Creates a graph from a DataSet of Tuple2 objects for edges. * Each Tuple2 will become one Edge, where the source ID will be the first field of the Tuple2 * and the target ID will be the second field of the Tuple2. *

* Edge value types and Vertex values types will be set to NullValue. * * @param edges a DataSet of Tuple2. * @param context the flink execution environment. * @return the newly created graph. */ public static Graph fromTuple2DataSet(DataSet> edges, ExecutionEnvironment context) { DataSet> edgeDataSet = edges.map( new MapFunction, Edge>() { public Edge map(Tuple2 input) { return new Edge(input.f0, input.f1, NullValue.getInstance()); } }).withForwardedFields("f0; f1"); return fromDataSet(edgeDataSet, context); } /** * Creates a graph from a DataSet of Tuple2 objects for edges. * Each Tuple2 will become one Edge, where the source ID will be the first field of the Tuple2 * and the target ID will be the second field of the Tuple2. *

* Edge value types will be set to NullValue. * Vertex values can be initialized by applying a user-defined map function on the vertex IDs. * * @param edges a DataSet of Tuple2, where the first field corresponds to the source ID * and the second field corresponds to the target ID. * @param vertexValueInitializer the mapper function that initializes the vertex values. * It allows to apply a map transformation on the vertex ID to produce an initial vertex value. * @param context the flink execution environment. * @return the newly created graph. */ public static Graph fromTuple2DataSet(DataSet> edges, final MapFunction vertexValueInitializer, ExecutionEnvironment context) { DataSet> edgeDataSet = edges.map( new MapFunction, Edge>() { public Edge map(Tuple2 input) { return new Edge(input.f0, input.f1, NullValue.getInstance()); } }).withForwardedFields("f0; f1"); return fromDataSet(edgeDataSet, vertexValueInitializer, context); } /** * Creates a Graph from a CSV file of vertices and a CSV file of edges. * * @param verticesPath path to a CSV file with the Vertex data. * @param edgesPath path to a CSV file with the Edge data * @param context the Flink execution environment. * @return An instance of {@link org.apache.flink.graph.GraphCsvReader}, * on which calling methods to specify types of the Vertex ID, Vertex value and Edge value returns a Graph. * * @see org.apache.flink.graph.GraphCsvReader#types(Class, Class, Class) * @see org.apache.flink.graph.GraphCsvReader#vertexTypes(Class, Class) * @see org.apache.flink.graph.GraphCsvReader#edgeTypes(Class, Class) * @see org.apache.flink.graph.GraphCsvReader#keyType(Class) */ public static GraphCsvReader fromCsvReader(String verticesPath, String edgesPath, ExecutionEnvironment context) { return new GraphCsvReader(verticesPath, edgesPath, context); } /** * Creates a graph from a CSV file of edges. Vertices will be created automatically. * * @param edgesPath a path to a CSV file with the Edges data * @param context the execution environment. * @return An instance of {@link org.apache.flink.graph.GraphCsvReader}, * on which calling methods to specify types of the Vertex ID, Vertex value and Edge value returns a Graph. * * @see org.apache.flink.graph.GraphCsvReader#types(Class, Class, Class) * @see org.apache.flink.graph.GraphCsvReader#vertexTypes(Class, Class) * @see org.apache.flink.graph.GraphCsvReader#edgeTypes(Class, Class) * @see org.apache.flink.graph.GraphCsvReader#keyType(Class) */ public static GraphCsvReader fromCsvReader(String edgesPath, ExecutionEnvironment context) { return new GraphCsvReader(edgesPath, context); } /** * Creates a graph from a CSV file of edges. Vertices will be created automatically and * Vertex values can be initialized using a user-defined mapper. * * @param edgesPath a path to a CSV file with the Edge data * @param vertexValueInitializer the mapper function that initializes the vertex values. * It allows to apply a map transformation on the vertex ID to produce an initial vertex value. * @param context the execution environment. * @return An instance of {@link org.apache.flink.graph.GraphCsvReader}, * on which calling methods to specify types of the Vertex ID, Vertex Value and Edge value returns a Graph. * * @see org.apache.flink.graph.GraphCsvReader#types(Class, Class, Class) * @see org.apache.flink.graph.GraphCsvReader#vertexTypes(Class, Class) * @see org.apache.flink.graph.GraphCsvReader#edgeTypes(Class, Class) * @see org.apache.flink.graph.GraphCsvReader#keyType(Class) */ public static GraphCsvReader fromCsvReader(String edgesPath, final MapFunction vertexValueInitializer, ExecutionEnvironment context) { return new GraphCsvReader(edgesPath, vertexValueInitializer, context); } /** * @return the flink execution environment. */ public ExecutionEnvironment getContext() { return this.context; } /** * Function that checks whether a Graph is a valid Graph, * as defined by the given {@link GraphValidator}. * * @return true if the Graph is valid. */ public Boolean validate(GraphValidator validator) throws Exception { return validator.validate(this); } /** * @return the vertex DataSet. */ public DataSet> getVertices() { return vertices; } /** * @return the edge DataSet. */ public DataSet> getEdges() { return edges; } /** * @return the vertex DataSet as Tuple2. */ public DataSet> getVerticesAsTuple2() { return vertices.map(new VertexToTuple2Map()); } /** * @return the edge DataSet as Tuple3. */ public DataSet> getEdgesAsTuple3() { return edges.map(new EdgeToTuple3Map()); } /** * This method allows access to the graph's edge values along with its source and target vertex values. * * @return a triplet DataSet consisting of (srcVertexId, trgVertexId, srcVertexValue, trgVertexValue, edgeValue) */ public DataSet> getTriplets() { return this.getVertices().join(this.getEdges()).where(0).equalTo(0) .with(new ProjectEdgeWithSrcValue()) .join(this.getVertices()).where(1).equalTo(0) .with(new ProjectEdgeWithVertexValues()); } @ForwardedFieldsFirst("f1->f2") @ForwardedFieldsSecond("f0; f1; f2->f3") private static final class ProjectEdgeWithSrcValue implements FlatJoinFunction, Edge, Tuple4> { @Override public void join(Vertex vertex, Edge edge, Collector> collector) throws Exception { collector.collect(new Tuple4(edge.getSource(), edge.getTarget(), vertex.getValue(), edge.getValue())); } } @ForwardedFieldsFirst("f0; f1; f2; f3->f4") @ForwardedFieldsSecond("f1->f3") private static final class ProjectEdgeWithVertexValues implements FlatJoinFunction, Vertex, Triplet> { @Override public void join(Tuple4 tripletWithSrcValSet, Vertex vertex, Collector> collector) throws Exception { collector.collect(new Triplet(tripletWithSrcValSet.f0, tripletWithSrcValSet.f1, tripletWithSrcValSet.f2, vertex.getValue(), tripletWithSrcValSet.f3)); } } /** * Apply a function to the attribute of each vertex in the graph. * * @param mapper the map function to apply. * @return a new graph */ @SuppressWarnings({ "unchecked", "rawtypes" }) public Graph mapVertices(final MapFunction, NV> mapper) { TypeInformation keyType = ((TupleTypeInfo) vertices.getType()).getTypeAt(0); TypeInformation valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, null, null); TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo( Vertex.class, keyType, valueType); return mapVertices(mapper, returnType); } /** * Apply a function to the attribute of each vertex in the graph. * * @param mapper the map function to apply. * @param returnType the explicit return type. * @return a new graph */ public Graph mapVertices(final MapFunction, NV> mapper, TypeInformation> returnType) { DataSet> mappedVertices = vertices.map( new MapFunction, Vertex>() { public Vertex map(Vertex value) throws Exception { return new Vertex(value.f0, mapper.map(value)); } }) .returns(returnType) .withForwardedFields("f0"); return new Graph(mappedVertices, this.edges, this.context); } /** * Apply a function to the attribute of each edge in the graph. * * @param mapper the map function to apply. * @return a new graph */ @SuppressWarnings({ "unchecked", "rawtypes" }) public Graph mapEdges(final MapFunction, NV> mapper) { TypeInformation keyType = ((TupleTypeInfo) edges.getType()).getTypeAt(0); TypeInformation valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, null, null); TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo( Edge.class, keyType, keyType, valueType); return mapEdges(mapper, returnType); } /** * Translate {@link Vertex} and {@link Edge} IDs using the given {@link MapFunction}. * * @param translator implements conversion from {@code K} to {@code NEW} * @param new ID type * @return graph with translated vertex and edge IDs * @throws Exception */ public Graph translateGraphIds(TranslateFunction translator) throws Exception { return run(new TranslateGraphIds(translator)); } /** * Translate {@link Vertex} values using the given {@link MapFunction}. * * @param translator implements conversion from {@code VV} to {@code NEW} * @param new vertex value type * @return graph with translated vertex values * @throws Exception */ public Graph translateVertexValues(TranslateFunction translator) throws Exception { return run(new TranslateVertexValues(translator)); } /** * Translate {@link Edge} values using the given {@link MapFunction}. * * @param translator implements conversion from {@code EV} to {@code NEW} * @param new edge value type * @return graph with translated edge values * @throws Exception */ public Graph translateEdgeValues(TranslateFunction translator) throws Exception { return run(new TranslateEdgeValues(translator)); } /** * Apply a function to the attribute of each edge in the graph. * * @param mapper the map function to apply. * @param returnType the explicit return type. * @return a new graph */ public Graph mapEdges(final MapFunction, NV> mapper, TypeInformation> returnType) { DataSet> mappedEdges = edges.map( new MapFunction, Edge>() { public Edge map(Edge value) throws Exception { return new Edge(value.f0, value.f1, mapper .map(value)); } }) .returns(returnType) .withForwardedFields("f0; f1"); return new Graph(this.vertices, mappedEdges, this.context); } /** * Joins the vertex DataSet of this graph with an input Tuple2 DataSet and applies * a user-defined transformation on the values of the matched records. * The vertex ID and the first field of the Tuple2 DataSet are used as the join keys. * * @param inputDataSet the Tuple2 DataSet to join with. * The first field of the Tuple2 is used as the join key and the second field is passed * as a parameter to the transformation function. * @param vertexJoinFunction the transformation function to apply. * The first parameter is the current vertex value and the second parameter is the value * of the matched Tuple2 from the input DataSet. * @return a new Graph, where the vertex values have been updated according to the * result of the vertexJoinFunction. * * @param the type of the second field of the input Tuple2 DataSet. */ public Graph joinWithVertices(DataSet> inputDataSet, final VertexJoinFunction vertexJoinFunction) { DataSet> resultedVertices = this.getVertices() .coGroup(inputDataSet).where(0).equalTo(0) .with(new ApplyCoGroupToVertexValues(vertexJoinFunction)); return new Graph(resultedVertices, this.edges, this.context); } private static final class ApplyCoGroupToVertexValues implements CoGroupFunction, Tuple2, Vertex> { private VertexJoinFunction vertexJoinFunction; public ApplyCoGroupToVertexValues(VertexJoinFunction mapper) { this.vertexJoinFunction = mapper; } @Override public void coGroup(Iterable> vertices, Iterable> input, Collector> collector) throws Exception { final Iterator> vertexIterator = vertices.iterator(); final Iterator> inputIterator = input.iterator(); if (vertexIterator.hasNext()) { if (inputIterator.hasNext()) { final Tuple2 inputNext = inputIterator.next(); collector.collect(new Vertex(inputNext.f0, vertexJoinFunction .vertexJoin(vertexIterator.next().f1, inputNext.f1))); } else { collector.collect(vertexIterator.next()); } } } } /** * Joins the edge DataSet with an input DataSet on the composite key of both * source and target IDs and applies a user-defined transformation on the values * of the matched records. The first two fields of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first two fields of the Tuple3 are used as the composite join key * and the third field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple3 from the input DataSet. * @param the type of the third field of the input Tuple3 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public Graph joinWithEdges(DataSet> inputDataSet, final EdgeJoinFunction edgeJoinFunction) { DataSet> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(0, 1).equalTo(0, 1) .with(new ApplyCoGroupToEdgeValues(edgeJoinFunction)); return new Graph(this.vertices, resultedEdges, this.context); } private static final class ApplyCoGroupToEdgeValues implements CoGroupFunction, Tuple3, Edge> { private EdgeJoinFunction edgeJoinFunction; public ApplyCoGroupToEdgeValues(EdgeJoinFunction mapper) { this.edgeJoinFunction = mapper; } @Override public void coGroup(Iterable> edges, Iterable> input, Collector> collector) throws Exception { final Iterator> edgesIterator = edges.iterator(); final Iterator> inputIterator = input.iterator(); if (edgesIterator.hasNext()) { if (inputIterator.hasNext()) { final Tuple3 inputNext = inputIterator.next(); collector.collect(new Edge(inputNext.f0, inputNext.f1, edgeJoinFunction.edgeJoin( edgesIterator.next().f2, inputNext.f2))); } else { collector.collect(edgesIterator.next()); } } } } /** * Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation * on the values of the matched records. * The source ID of the edges input and the first field of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first field of the Tuple2 is used as the join key * and the second field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple2 from the input DataSet. * @param the type of the second field of the input Tuple2 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public Graph joinWithEdgesOnSource(DataSet> inputDataSet, final EdgeJoinFunction edgeJoinFunction) { DataSet> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(0).equalTo(0) .with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget(edgeJoinFunction)); return new Graph(this.vertices, resultedEdges, this.context); } private static final class ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget implements CoGroupFunction, Tuple2, Edge> { private EdgeJoinFunction edgeJoinFunction; public ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget(EdgeJoinFunction mapper) { this.edgeJoinFunction = mapper; } @Override public void coGroup(Iterable> edges, Iterable> input, Collector> collector) throws Exception { final Iterator> edgesIterator = edges.iterator(); final Iterator> inputIterator = input.iterator(); if (inputIterator.hasNext()) { final Tuple2 inputNext = inputIterator.next(); while (edgesIterator.hasNext()) { Edge edgesNext = edgesIterator.next(); collector.collect(new Edge(edgesNext.f0, edgesNext.f1, edgeJoinFunction.edgeJoin(edgesNext.f2, inputNext.f1))); } } else { while (edgesIterator.hasNext()) { collector.collect(edgesIterator.next()); } } } } /** * Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation * on the values of the matched records. * The target ID of the edges input and the first field of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first field of the Tuple2 is used as the join key * and the second field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple2 from the input DataSet. * @param the type of the second field of the input Tuple2 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public Graph joinWithEdgesOnTarget(DataSet> inputDataSet, final EdgeJoinFunction edgeJoinFunction) { DataSet> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(1).equalTo(0) .with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget(edgeJoinFunction)); return new Graph(this.vertices, resultedEdges, this.context); } /** * Apply filtering functions to the graph and return a sub-graph that * satisfies the predicates for both vertices and edges. * * @param vertexFilter the filter function for vertices. * @param edgeFilter the filter function for edges. * @return the resulting sub-graph. */ public Graph subgraph(FilterFunction> vertexFilter, FilterFunction> edgeFilter) { DataSet> filteredVertices = this.vertices.filter(vertexFilter); DataSet> remainingEdges = this.edges.join(filteredVertices) .where(0).equalTo(0).with(new ProjectEdge()) .join(filteredVertices).where(1).equalTo(0) .with(new ProjectEdge()); DataSet> filteredEdges = remainingEdges.filter(edgeFilter); return new Graph(filteredVertices, filteredEdges, this.context); } /** * Apply a filtering function to the graph and return a sub-graph that * satisfies the predicates only for the vertices. * * @param vertexFilter the filter function for vertices. * @return the resulting sub-graph. */ public Graph filterOnVertices(FilterFunction> vertexFilter) { DataSet> filteredVertices = this.vertices.filter(vertexFilter); DataSet> remainingEdges = this.edges.join(filteredVertices) .where(0).equalTo(0).with(new ProjectEdge()) .join(filteredVertices).where(1).equalTo(0) .with(new ProjectEdge()); return new Graph(filteredVertices, remainingEdges, this.context); } /** * Apply a filtering function to the graph and return a sub-graph that * satisfies the predicates only for the edges. * * @param edgeFilter the filter function for edges. * @return the resulting sub-graph. */ public Graph filterOnEdges(FilterFunction> edgeFilter) { DataSet> filteredEdges = this.edges.filter(edgeFilter); return new Graph(this.vertices, filteredEdges, this.context); } @ForwardedFieldsFirst("f0; f1; f2") private static final class ProjectEdge implements FlatJoinFunction< Edge, Vertex, Edge> { public void join(Edge first, Vertex second, Collector> out) { out.collect(first); } } /** * Return the out-degree of all vertices in the graph * * @return A DataSet of {@code Tuple2} */ public DataSet> outDegrees() { return vertices.coGroup(edges).where(0).equalTo(0).with(new CountNeighborsCoGroup()); } private static final class CountNeighborsCoGroup implements CoGroupFunction, Edge, Tuple2> { private LongValue degree = new LongValue(); private Tuple2 vertexDegree = new Tuple2<>(null, degree); @SuppressWarnings("unused") public void coGroup(Iterable> vertex, Iterable> outEdges, Collector> out) { long count = 0; for (Edge edge : outEdges) { count++; } degree.setValue(count); Iterator> vertexIterator = vertex.iterator(); if(vertexIterator.hasNext()) { vertexDegree.f0 = vertexIterator.next().f0; out.collect(vertexDegree); } else { throw new NoSuchElementException("The edge src/trg id could not be found within the vertexIds"); } } } /** * Return the in-degree of all vertices in the graph * * @return A DataSet of {@code Tuple2} */ public DataSet> inDegrees() { return vertices.coGroup(edges).where(0).equalTo(1).with(new CountNeighborsCoGroup()); } /** * Return the degree of all vertices in the graph * * @return A DataSet of {@code Tuple2} */ public DataSet> getDegrees() { return outDegrees().union(inDegrees()).groupBy(0).sum(1); } /** * This operation adds all inverse-direction edges to the graph. * * @return the undirected graph. */ public Graph getUndirected() { DataSet> undirectedEdges = edges.flatMap(new RegularAndReversedEdgesMap()); return new Graph(vertices, undirectedEdges, this.context); } /** * Groups by vertex and computes a GroupReduce transformation over the edge values of each vertex. * The edgesFunction applied on the edges has access to both the id and the value * of the grouping vertex. * * For each vertex, the edgesFunction can iterate over all edges of this vertex * with the specified direction, and emit any number of output elements, including none. * * @param edgesFunction the group reduce function to apply to the neighboring edges of each vertex. * @param direction the edge direction (in-, out-, all-). * @param the output type * @return a DataSet containing elements of type T * @throws IllegalArgumentException */ public DataSet groupReduceOnEdges(EdgesFunctionWithVertexValue edgesFunction, EdgeDirection direction) throws IllegalArgumentException { switch (direction) { case IN: return vertices.coGroup(edges).where(0).equalTo(1) .with(new ApplyCoGroupFunction(edgesFunction)); case OUT: return vertices.coGroup(edges).where(0).equalTo(0) .with(new ApplyCoGroupFunction(edgesFunction)); case ALL: return vertices.coGroup(edges.flatMap(new EmitOneEdgePerNode())) .where(0).equalTo(0).with(new ApplyCoGroupFunctionOnAllEdges(edgesFunction)); default: throw new IllegalArgumentException("Illegal edge direction"); } } /** * Groups by vertex and computes a GroupReduce transformation over the edge values of each vertex. * The edgesFunction applied on the edges has access to both the id and the value * of the grouping vertex. * * For each vertex, the edgesFunction can iterate over all edges of this vertex * with the specified direction, and emit any number of output elements, including none. * * @param edgesFunction the group reduce function to apply to the neighboring edges of each vertex. * @param direction the edge direction (in-, out-, all-). * @param the output type * @param typeInfo the explicit return type. * @return a DataSet containing elements of type T * @throws IllegalArgumentException */ public DataSet groupReduceOnEdges(EdgesFunctionWithVertexValue edgesFunction, EdgeDirection direction, TypeInformation typeInfo) throws IllegalArgumentException { switch (direction) { case IN: return vertices.coGroup(edges).where(0).equalTo(1) .with(new ApplyCoGroupFunction(edgesFunction)).returns(typeInfo); case OUT: return vertices.coGroup(edges).where(0).equalTo(0) .with(new ApplyCoGroupFunction(edgesFunction)).returns(typeInfo); case ALL: return vertices.coGroup(edges.flatMap(new EmitOneEdgePerNode())) .where(0).equalTo(0).with(new ApplyCoGroupFunctionOnAllEdges(edgesFunction)).returns(typeInfo); default: throw new IllegalArgumentException("Illegal edge direction"); } } /** * Groups by vertex and computes a GroupReduce transformation over the edge values of each vertex. * The edgesFunction applied on the edges only has access to the vertex id (not the vertex value) * of the grouping vertex. * * For each vertex, the edgesFunction can iterate over all edges of this vertex * with the specified direction, and emit any number of output elements, including none. * * @param edgesFunction the group reduce function to apply to the neighboring edges of each vertex. * @param direction the edge direction (in-, out-, all-). * @param the output type * @return a DataSet containing elements of type T * @throws IllegalArgumentException */ public DataSet groupReduceOnEdges(EdgesFunction edgesFunction, EdgeDirection direction) throws IllegalArgumentException { switch (direction) { case IN: return edges.map(new ProjectVertexIdMap(1)) .withForwardedFields("f1->f0") .groupBy(0).reduceGroup(new ApplyGroupReduceFunction(edgesFunction)); case OUT: return edges.map(new ProjectVertexIdMap(0)) .withForwardedFields("f0") .groupBy(0).reduceGroup(new ApplyGroupReduceFunction(edgesFunction)); case ALL: return edges.flatMap(new EmitOneEdgePerNode()) .groupBy(0).reduceGroup(new ApplyGroupReduceFunction(edgesFunction)); default: throw new IllegalArgumentException("Illegal edge direction"); } } /** * Groups by vertex and computes a GroupReduce transformation over the edge values of each vertex. * The edgesFunction applied on the edges only has access to the vertex id (not the vertex value) * of the grouping vertex. * * For each vertex, the edgesFunction can iterate over all edges of this vertex * with the specified direction, and emit any number of output elements, including none. * * @param edgesFunction the group reduce function to apply to the neighboring edges of each vertex. * @param direction the edge direction (in-, out-, all-). * @param the output type * @param typeInfo the explicit return type. * @return a DataSet containing elements of type T * @throws IllegalArgumentException */ public DataSet groupReduceOnEdges(EdgesFunction edgesFunction, EdgeDirection direction, TypeInformation typeInfo) throws IllegalArgumentException { switch (direction) { case IN: return edges.map(new ProjectVertexIdMap(1)) .withForwardedFields("f1->f0") .groupBy(0).reduceGroup(new ApplyGroupReduceFunction(edgesFunction)).returns(typeInfo); case OUT: return edges.map(new ProjectVertexIdMap(0)) .withForwardedFields("f0") .groupBy(0).reduceGroup(new ApplyGroupReduceFunction(edgesFunction)).returns(typeInfo); case ALL: return edges.flatMap(new EmitOneEdgePerNode()) .groupBy(0).reduceGroup(new ApplyGroupReduceFunction(edgesFunction)).returns(typeInfo); default: throw new IllegalArgumentException("Illegal edge direction"); } } private static final class ProjectVertexIdMap implements MapFunction< Edge, Tuple2>> { private int fieldPosition; public ProjectVertexIdMap(int position) { this.fieldPosition = position; } @SuppressWarnings("unchecked") public Tuple2> map(Edge edge) { return new Tuple2>((K) edge.getField(fieldPosition), edge); } } private static final class ProjectVertexWithEdgeValueMap implements MapFunction< Edge, Tuple2> { private int fieldPosition; public ProjectVertexWithEdgeValueMap(int position) { this.fieldPosition = position; } @SuppressWarnings("unchecked") public Tuple2 map(Edge edge) { return new Tuple2((K) edge.getField(fieldPosition), edge.getValue()); } } private static final class ApplyGroupReduceFunction implements GroupReduceFunction< Tuple2>, T>, ResultTypeQueryable { private EdgesFunction function; public ApplyGroupReduceFunction(EdgesFunction fun) { this.function = fun; } public void reduce(Iterable>> edges, Collector out) throws Exception { function.iterateEdges(edges, out); } @Override public TypeInformation getProducedType() { return TypeExtractor.createTypeInfo(EdgesFunction.class, function.getClass(), 2, null, null); } } private static final class EmitOneEdgePerNode implements FlatMapFunction< Edge, Tuple2>> { public void flatMap(Edge edge, Collector>> out) { out.collect(new Tuple2>(edge.getSource(), edge)); out.collect(new Tuple2>(edge.getTarget(), edge)); } } private static final class EmitOneVertexWithEdgeValuePerNode implements FlatMapFunction< Edge, Tuple2> { public void flatMap(Edge edge, Collector> out) { out.collect(new Tuple2(edge.getSource(), edge.getValue())); out.collect(new Tuple2(edge.getTarget(), edge.getValue())); } } private static final class EmitOneEdgeWithNeighborPerNode implements FlatMapFunction< Edge, Tuple3>> { public void flatMap(Edge edge, Collector>> out) { out.collect(new Tuple3>(edge.getSource(), edge.getTarget(), edge)); out.collect(new Tuple3>(edge.getTarget(), edge.getSource(), edge)); } } private static final class ApplyCoGroupFunction implements CoGroupFunction< Vertex, Edge, T>, ResultTypeQueryable { private EdgesFunctionWithVertexValue function; public ApplyCoGroupFunction(EdgesFunctionWithVertexValue fun) { this.function = fun; } public void coGroup(Iterable> vertex, Iterable> edges, Collector out) throws Exception { Iterator> vertexIterator = vertex.iterator(); if(vertexIterator.hasNext()) { function.iterateEdges(vertexIterator.next(), edges, out); } else { throw new NoSuchElementException("The edge src/trg id could not be found within the vertexIds"); } } @Override public TypeInformation getProducedType() { return TypeExtractor.createTypeInfo(EdgesFunctionWithVertexValue.class, function.getClass(), 3, null, null); } } private static final class ApplyCoGroupFunctionOnAllEdges implements CoGroupFunction, Tuple2>, T>, ResultTypeQueryable { private EdgesFunctionWithVertexValue function; public ApplyCoGroupFunctionOnAllEdges(EdgesFunctionWithVertexValue fun) { this.function = fun; } public void coGroup(Iterable> vertex, final Iterable>> keysWithEdges, Collector out) throws Exception { final Iterator> edgesIterator = new Iterator>() { final Iterator>> keysWithEdgesIterator = keysWithEdges.iterator(); @Override public boolean hasNext() { return keysWithEdgesIterator.hasNext(); } @Override public Edge next() { return keysWithEdgesIterator.next().f1; } @Override public void remove() { keysWithEdgesIterator.remove(); } }; Iterable> edgesIterable = new Iterable>() { public Iterator> iterator() { return edgesIterator; } }; Iterator> vertexIterator = vertex.iterator(); if(vertexIterator.hasNext()) { function.iterateEdges(vertexIterator.next(), edgesIterable, out); } else { throw new NoSuchElementException("The edge src/trg id could not be found within the vertexIds"); } } @Override public TypeInformation getProducedType() { return TypeExtractor.createTypeInfo(EdgesFunctionWithVertexValue.class, function.getClass(), 3, null, null); } } @ForwardedFields("f0->f1; f1->f0; f2") private static final class ReverseEdgesMap implements MapFunction, Edge> { public Edge map(Edge value) { return new Edge(value.f1, value.f0, value.f2); } } private static final class RegularAndReversedEdgesMap implements FlatMapFunction, Edge> { @Override public void flatMap(Edge edge, Collector> out) throws Exception { out.collect(new Edge(edge.f0, edge.f1, edge.f2)); out.collect(new Edge(edge.f1, edge.f0, edge.f2)); } } /** * Reverse the direction of the edges in the graph * * @return a new graph with all edges reversed * @throws UnsupportedOperationException */ public Graph reverse() throws UnsupportedOperationException { DataSet> reversedEdges = edges.map(new ReverseEdgesMap()); return new Graph(vertices, reversedEdges, this.context); } /** * @return a long integer representing the number of vertices */ public long numberOfVertices() throws Exception { return vertices.count(); } /** * @return a long integer representing the number of edges */ public long numberOfEdges() throws Exception { return edges.count(); } /** * @return The IDs of the vertices as DataSet */ public DataSet getVertexIds() { return vertices.map(new ExtractVertexIDMapper()); } private static final class ExtractVertexIDMapper implements MapFunction, K> { @Override public K map(Vertex vertex) { return vertex.f0; } } /** * @return The IDs of the edges as DataSet */ public DataSet> getEdgeIds() { return edges.map(new ExtractEdgeIDsMapper()); } @ForwardedFields("f0; f1") private static final class ExtractEdgeIDsMapper implements MapFunction, Tuple2> { @Override public Tuple2 map(Edge edge) throws Exception { return new Tuple2(edge.f0, edge.f1); } } /** * Adds the input vertex to the graph. If the vertex already * exists in the graph, it will not be added again. * * @param vertex the vertex to be added * @return the new graph containing the existing vertices as well as the one just added */ public Graph addVertex(final Vertex vertex) { List> newVertex = new ArrayList>(); newVertex.add(vertex); return addVertices(newVertex); } /** * Adds the list of vertices, passed as input, to the graph. * If the vertices already exist in the graph, they will not be added once more. * * @param verticesToAdd the list of vertices to add * @return the new graph containing the existing and newly added vertices */ public Graph addVertices(List> verticesToAdd) { // Add the vertices DataSet> newVertices = this.vertices.coGroup(this.context.fromCollection(verticesToAdd)) .where(0).equalTo(0).with(new VerticesUnionCoGroup()); return new Graph<>(newVertices, this.edges, this.context); } private static final class VerticesUnionCoGroup implements CoGroupFunction, Vertex, Vertex> { @Override public void coGroup(Iterable> oldVertices, Iterable> newVertices, Collector> out) throws Exception { final Iterator> oldVerticesIterator = oldVertices.iterator(); final Iterator> newVerticesIterator = newVertices.iterator(); // if there is both an old vertex and a new vertex then only the old vertex is emitted if (oldVerticesIterator.hasNext()) { out.collect(oldVerticesIterator.next()); } else { out.collect(newVerticesIterator.next()); } } } /** * Adds the given edge to the graph. If the source and target vertices do * not exist in the graph, they will also be added. * * @param source the source vertex of the edge * @param target the target vertex of the edge * @param edgeValue the edge value * @return the new graph containing the existing vertices and edges plus the * newly added edge */ public Graph addEdge(Vertex source, Vertex target, EV edgeValue) { Graph partialGraph = fromCollection(Arrays.asList(source, target), Arrays.asList(new Edge(source.f0, target.f0, edgeValue)), this.context); return this.union(partialGraph); } /** * Adds the given list edges to the graph. * * When adding an edge for a non-existing set of vertices, the edge is considered invalid and ignored. * * @param newEdges the data set of edges to be added * @return a new graph containing the existing edges plus the newly added edges. */ public Graph addEdges(List> newEdges) { DataSet> newEdgesDataSet = this.context.fromCollection(newEdges); DataSet> validNewEdges = this.getVertices().join(newEdgesDataSet) .where(0).equalTo(0) .with(new JoinVerticesWithEdgesOnSrc()) .join(this.getVertices()).where(1).equalTo(0) .with(new JoinWithVerticesOnTrg()); return Graph.fromDataSet(this.vertices, this.edges.union(validNewEdges), this.context); } @ForwardedFieldsSecond("f0; f1; f2") private static final class JoinVerticesWithEdgesOnSrc implements JoinFunction, Edge, Edge> { @Override public Edge join(Vertex vertex, Edge edge) throws Exception { return edge; } } @ForwardedFieldsFirst("f0; f1; f2") private static final class JoinWithVerticesOnTrg implements JoinFunction, Vertex, Edge> { @Override public Edge join(Edge edge, Vertex vertex) throws Exception { return edge; } } /** * Removes the given vertex and its edges from the graph. * * @param vertex the vertex to remove * @return the new graph containing the existing vertices and edges without * the removed vertex and its edges */ public Graph removeVertex(Vertex vertex) { List> vertexToBeRemoved = new ArrayList>(); vertexToBeRemoved.add(vertex); return removeVertices(vertexToBeRemoved); } /** * Removes the given list of vertices and its edges from the graph. * * @param verticesToBeRemoved the list of vertices to be removed * @return the resulted graph containing the initial vertices and edges minus the vertices * and edges removed. */ public Graph removeVertices(List> verticesToBeRemoved) { return removeVertices(this.context.fromCollection(verticesToBeRemoved)); } /** * Removes the given list of vertices and its edges from the graph. * * @param verticesToBeRemoved the DataSet of vertices to be removed * @return the resulted graph containing the initial vertices and edges minus the vertices * and edges removed. */ private Graph removeVertices(DataSet> verticesToBeRemoved) { DataSet> newVertices = getVertices().coGroup(verticesToBeRemoved).where(0).equalTo(0) .with(new VerticesRemovalCoGroup()); DataSet > newEdges = newVertices.join(getEdges()).where(0).equalTo(0) // if the edge source was removed, the edge will also be removed .with(new ProjectEdgeToBeRemoved()) // if the edge target was removed, the edge will also be removed .join(newVertices).where(1).equalTo(0) .with(new ProjectEdge()); return new Graph(newVertices, newEdges, context); } private static final class VerticesRemovalCoGroup implements CoGroupFunction, Vertex, Vertex> { @Override public void coGroup(Iterable> vertex, Iterable> vertexToBeRemoved, Collector> out) throws Exception { final Iterator> vertexIterator = vertex.iterator(); final Iterator> vertexToBeRemovedIterator = vertexToBeRemoved.iterator(); Vertex next; if (vertexIterator.hasNext()) { if (!vertexToBeRemovedIterator.hasNext()) { next = vertexIterator.next(); out.collect(next); } } } } @ForwardedFieldsSecond("f0; f1; f2") private static final class ProjectEdgeToBeRemoved implements JoinFunction, Edge, Edge> { @Override public Edge join(Vertex vertex, Edge edge) throws Exception { return edge; } } /** * Removes all edges that match the given edge from the graph. * * @param edge the edge to remove * @return the new graph containing the existing vertices and edges without * the removed edges */ public Graph removeEdge(Edge edge) { DataSet> newEdges = getEdges().filter(new EdgeRemovalEdgeFilter<>(edge)); return new Graph<>(this.vertices, newEdges, this.context); } private static final class EdgeRemovalEdgeFilter implements FilterFunction> { private Edge edgeToRemove; public EdgeRemovalEdgeFilter(Edge edge) { edgeToRemove = edge; } @Override public boolean filter(Edge edge) { return (!(edge.f0.equals(edgeToRemove.f0) && edge.f1 .equals(edgeToRemove.f1))); } } /** * Removes all the edges that match the edges in the given data set from the graph. * * @param edgesToBeRemoved the list of edges to be removed * @return a new graph where the edges have been removed and in which the vertices remained intact */ public Graph removeEdges(List> edgesToBeRemoved) { DataSet> newEdges = getEdges().coGroup(this.context.fromCollection(edgesToBeRemoved)) .where(0,1).equalTo(0,1).with(new EdgeRemovalCoGroup()); return new Graph(this.vertices, newEdges, context); } private static final class EdgeRemovalCoGroup implements CoGroupFunction, Edge, Edge> { @Override public void coGroup(Iterable> edge, Iterable> edgeToBeRemoved, Collector> out) throws Exception { if (!edgeToBeRemoved.iterator().hasNext()) { for (Edge next : edge) { out.collect(next); } } } } /** * Performs union on the vertices and edges sets of the input graphs * removing duplicate vertices but maintaining duplicate edges. * * @param graph the graph to perform union with * @return a new graph */ public Graph union(Graph graph) { DataSet> unionedVertices = graph.getVertices().union(this.getVertices()).distinct(); DataSet> unionedEdges = graph.getEdges().union(this.getEdges()); return new Graph(unionedVertices, unionedEdges, this.context); } /** * Performs Difference on the vertex and edge sets of the input graphs * removes common vertices and edges. If a source/target vertex is removed, * its corresponding edge will also be removed * * @param graph the graph to perform difference with * @return a new graph where the common vertices and edges have been removed */ public Graph difference(Graph graph) { DataSet> removeVerticesData = graph.getVertices(); return this.removeVertices(removeVerticesData); } /** * Performs intersect on the edge sets of the input graphs. Edges are considered equal, if they * have the same source identifier, target identifier and edge value. *

* The method computes pairs of equal edges from the input graphs. If the same edge occurs * multiple times in the input graphs, there will be multiple edge pairs to be considered. Each * edge instance can only be part of one pair. If the given parameter {@code distinctEdges} is set * to {@code true}, there will be exactly one edge in the output graph representing all pairs of * equal edges. If the parameter is set to {@code false}, both edges of each pair will be in the * output. *

* Vertices in the output graph will have no vertex values. * * @param graph the graph to perform intersect with * @param distinctEdges if set to {@code true}, there will be exactly one edge in the output graph * representing all pairs of equal edges, otherwise, for each pair, both * edges will be in the output graph * @return a new graph which contains only common vertices and edges from the input graphs */ public Graph intersect(Graph graph, boolean distinctEdges) { DataSet> intersectEdges; if (distinctEdges) { intersectEdges = getDistinctEdgeIntersection(graph.getEdges()); } else { intersectEdges = getPairwiseEdgeIntersection(graph.getEdges()); } return Graph.fromDataSet(intersectEdges, getContext()); } /** * Computes the intersection between the edge set and the given edge set. For all matching pairs, * only one edge will be in the resulting data set. * * @param edges edges to compute intersection with * @return edge set containing one edge for all matching pairs of the same edge */ private DataSet> getDistinctEdgeIntersection(DataSet> edges) { return this.getEdges() .join(edges) .where(0, 1, 2) .equalTo(0, 1, 2) .with(new JoinFunction, Edge, Edge>() { @Override public Edge join(Edge first, Edge second) throws Exception { return first; } }).withForwardedFieldsFirst("*") .distinct(); } /** * Computes the intersection between the edge set and the given edge set. For all matching pairs, both edges will be * in the resulting data set. * * @param edges edges to compute intersection with * @return edge set containing both edges from all matching pairs of the same edge */ private DataSet> getPairwiseEdgeIntersection(DataSet> edges) { return this.getEdges() .coGroup(edges) .where(0, 1, 2) .equalTo(0, 1, 2) .with(new MatchingEdgeReducer()); } /** * As long as both input iterables have more edges, the reducer outputs each edge of a pair. * * @param vertex identifier type * @param edge value type */ private static final class MatchingEdgeReducer implements CoGroupFunction, Edge, Edge> { @Override public void coGroup(Iterable> edgesLeft, Iterable> edgesRight, Collector> out) throws Exception { Iterator> leftIt = edgesLeft.iterator(); Iterator> rightIt = edgesRight.iterator(); // collect pairs once while(leftIt.hasNext() && rightIt.hasNext()) { out.collect(leftIt.next()); out.collect(rightIt.next()); } } } /** * Runs a ScatterGather iteration on the graph. * No configuration options are provided. * * @param scatterFunction the scatter function * @param gatherFunction the gather function * @param maximumNumberOfIterations maximum number of iterations to perform * * @return the updated Graph after the scatter-gather iteration has converged or * after maximumNumberOfIterations. */ public Graph runScatterGatherIteration( ScatterFunction scatterFunction, org.apache.flink.graph.spargel.GatherFunction gatherFunction, int maximumNumberOfIterations) { return this.runScatterGatherIteration(scatterFunction, gatherFunction, maximumNumberOfIterations, null); } /** * Runs a ScatterGather iteration on the graph with configuration options. * * @param scatterFunction the scatter function * @param gatherFunction the gather function * @param maximumNumberOfIterations maximum number of iterations to perform * @param parameters the iteration configuration parameters * * @return the updated Graph after the scatter-gather iteration has converged or * after maximumNumberOfIterations. */ public Graph runScatterGatherIteration( ScatterFunction scatterFunction, org.apache.flink.graph.spargel.GatherFunction gatherFunction, int maximumNumberOfIterations, ScatterGatherConfiguration parameters) { ScatterGatherIteration iteration = ScatterGatherIteration.withEdges( edges, scatterFunction, gatherFunction, maximumNumberOfIterations); iteration.configure(parameters); DataSet> newVertices = this.getVertices().runOperation(iteration); return new Graph(newVertices, this.edges, this.context); } /** * Runs a Gather-Sum-Apply iteration on the graph. * No configuration options are provided. * * @param gatherFunction the gather function collects information about adjacent vertices and edges * @param sumFunction the sum function aggregates the gathered information * @param applyFunction the apply function updates the vertex values with the aggregates * @param maximumNumberOfIterations maximum number of iterations to perform * @param the intermediate type used between gather, sum and apply * * @return the updated Graph after the gather-sum-apply iteration has converged or * after maximumNumberOfIterations. */ public Graph runGatherSumApplyIteration( org.apache.flink.graph.gsa.GatherFunction gatherFunction, SumFunction sumFunction, ApplyFunction applyFunction, int maximumNumberOfIterations) { return this.runGatherSumApplyIteration(gatherFunction, sumFunction, applyFunction, maximumNumberOfIterations, null); } /** * Runs a Gather-Sum-Apply iteration on the graph with configuration options. * * @param gatherFunction the gather function collects information about adjacent vertices and edges * @param sumFunction the sum function aggregates the gathered information * @param applyFunction the apply function updates the vertex values with the aggregates * @param maximumNumberOfIterations maximum number of iterations to perform * @param parameters the iteration configuration parameters * @param the intermediate type used between gather, sum and apply * * @return the updated Graph after the gather-sum-apply iteration has converged or * after maximumNumberOfIterations. */ public Graph runGatherSumApplyIteration( org.apache.flink.graph.gsa.GatherFunction gatherFunction, SumFunction sumFunction, ApplyFunction applyFunction, int maximumNumberOfIterations, GSAConfiguration parameters) { GatherSumApplyIteration iteration = GatherSumApplyIteration.withEdges( edges, gatherFunction, sumFunction, applyFunction, maximumNumberOfIterations); iteration.configure(parameters); DataSet> newVertices = vertices.runOperation(iteration); return new Graph(newVertices, this.edges, this.context); } /** * Runs a {@link VertexCentricIteration} on the graph. * No configuration options are provided. * * @param computeFunction the vertex compute function * @param combiner an optional message combiner * @param maximumNumberOfIterations maximum number of iterations to perform * * @return the updated Graph after the vertex-centric iteration has converged or * after maximumNumberOfIterations. */ public Graph runVertexCentricIteration( ComputeFunction computeFunction, MessageCombiner combiner, int maximumNumberOfIterations) { return this.runVertexCentricIteration(computeFunction, combiner, maximumNumberOfIterations, null); } /** * Runs a {@link VertexCentricIteration} on the graph with configuration options. * * @param computeFunction the vertex compute function * @param combiner an optional message combiner * @param maximumNumberOfIterations maximum number of iterations to perform * @param parameters the {@link VertexCentricConfiguration} parameters * * @return the updated Graph after the vertex-centric iteration has converged or * after maximumNumberOfIterations. */ public Graph runVertexCentricIteration( ComputeFunction computeFunction, MessageCombiner combiner, int maximumNumberOfIterations, VertexCentricConfiguration parameters) { VertexCentricIteration iteration = VertexCentricIteration.withEdges( edges, computeFunction, maximumNumberOfIterations); iteration.configure(parameters); DataSet> newVertices = this.getVertices().runOperation(iteration); return new Graph(newVertices, this.edges, this.context); } /** * @param algorithm the algorithm to run on the Graph * @param the return type * @return the result of the graph algorithm * @throws Exception */ public T run(GraphAlgorithm algorithm) throws Exception { return algorithm.run(this); } /** * A {@code GraphAnalytic} is similar to a {@link GraphAlgorithm} but is terminal * and results are retrieved via accumulators. A Flink program has a single * point of execution. A {@code GraphAnalytic} defers execution to the user to * allow composing multiple analytics and algorithms into a single program. * * @param analytic the analytic to run on the Graph * @param the result type * @throws Exception */ public GraphAnalytic run(GraphAnalytic analytic) throws Exception { analytic.run(this); return analytic; } /** * Groups by vertex and computes a GroupReduce transformation over the neighbors (both edges and vertices) * of each vertex. The neighborsFunction applied on the neighbors only has access to both the vertex id * and the vertex value of the grouping vertex. * * For each vertex, the neighborsFunction can iterate over all neighbors of this vertex * with the specified direction, and emit any number of output elements, including none. * * @param neighborsFunction the group reduce function to apply to the neighboring edges and vertices * of each vertex. * @param direction the edge direction (in-, out-, all-). * @param the output type * @return a DataSet containing elements of type T * @throws IllegalArgumentException */ public DataSet groupReduceOnNeighbors(NeighborsFunctionWithVertexValue neighborsFunction, EdgeDirection direction) throws IllegalArgumentException { switch (direction) { case IN: // create pairs DataSet, Vertex>> edgesWithSources = edges .join(this.vertices).where(0).equalTo(0); return vertices.coGroup(edgesWithSources) .where(0).equalTo("f0.f1") .with(new ApplyNeighborCoGroupFunction(neighborsFunction)); case OUT: // create pairs DataSet, Vertex>> edgesWithTargets = edges .join(this.vertices).where(1).equalTo(0); return vertices.coGroup(edgesWithTargets) .where(0).equalTo("f0.f0") .with(new ApplyNeighborCoGroupFunction(neighborsFunction)); case ALL: // create pairs DataSet, Vertex>> edgesWithNeighbors = edges .flatMap(new EmitOneEdgeWithNeighborPerNode()) .join(this.vertices).where(1).equalTo(0) .with(new ProjectEdgeWithNeighbor()); return vertices.coGroup(edgesWithNeighbors) .where(0).equalTo(0) .with(new ApplyCoGroupFunctionOnAllNeighbors(neighborsFunction)); default: throw new IllegalArgumentException("Illegal edge direction"); } } /** * Groups by vertex and computes a GroupReduce transformation over the neighbors (both edges and vertices) * of each vertex. The neighborsFunction applied on the neighbors only has access to both the vertex id * and the vertex value of the grouping vertex. * * For each vertex, the neighborsFunction can iterate over all neighbors of this vertex * with the specified direction, and emit any number of output elements, including none. * * @param neighborsFunction the group reduce function to apply to the neighboring edges and vertices * of each vertex. * @param direction the edge direction (in-, out-, all-). * @param the output type * @param typeInfo the explicit return type * @return a DataSet containing elements of type T * @throws IllegalArgumentException */ public DataSet groupReduceOnNeighbors(NeighborsFunctionWithVertexValue neighborsFunction, EdgeDirection direction, TypeInformation typeInfo) throws IllegalArgumentException { switch (direction) { case IN: // create pairs DataSet, Vertex>> edgesWithSources = edges .join(this.vertices).where(0).equalTo(0); return vertices.coGroup(edgesWithSources) .where(0).equalTo("f0.f1") .with(new ApplyNeighborCoGroupFunction(neighborsFunction)).returns(typeInfo); case OUT: // create pairs DataSet, Vertex>> edgesWithTargets = edges .join(this.vertices).where(1).equalTo(0); return vertices.coGroup(edgesWithTargets) .where(0).equalTo("f0.f0") .with(new ApplyNeighborCoGroupFunction(neighborsFunction)).returns(typeInfo); case ALL: // create pairs DataSet, Vertex>> edgesWithNeighbors = edges .flatMap(new EmitOneEdgeWithNeighborPerNode()) .join(this.vertices).where(1).equalTo(0) .with(new ProjectEdgeWithNeighbor()); return vertices.coGroup(edgesWithNeighbors) .where(0).equalTo(0) .with(new ApplyCoGroupFunctionOnAllNeighbors(neighborsFunction)).returns(typeInfo); default: throw new IllegalArgumentException("Illegal edge direction"); } } /** * Groups by vertex and computes a GroupReduce transformation over the neighbors (both edges and vertices) * of each vertex. The neighborsFunction applied on the neighbors only has access to the vertex id * (not the vertex value) of the grouping vertex. * * For each vertex, the neighborsFunction can iterate over all neighbors of this vertex * with the specified direction, and emit any number of output elements, including none. * * @param neighborsFunction the group reduce function to apply to the neighboring edges and vertices * of each vertex. * @param direction the edge direction (in-, out-, all-). * @param the output type * @return a DataSet containing elements of type T * @throws IllegalArgumentException */ public DataSet groupReduceOnNeighbors(NeighborsFunction neighborsFunction, EdgeDirection direction) throws IllegalArgumentException { switch (direction) { case IN: // create pairs DataSet, Vertex>> edgesWithSources = edges .join(this.vertices).where(0).equalTo(0) .with(new ProjectVertexIdJoin(1)) .withForwardedFieldsFirst("f1->f0"); return edgesWithSources.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction(neighborsFunction)); case OUT: // create pairs DataSet, Vertex>> edgesWithTargets = edges .join(this.vertices).where(1).equalTo(0) .with(new ProjectVertexIdJoin(0)) .withForwardedFieldsFirst("f0"); return edgesWithTargets.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction(neighborsFunction)); case ALL: // create pairs DataSet, Vertex>> edgesWithNeighbors = edges .flatMap(new EmitOneEdgeWithNeighborPerNode()) .join(this.vertices).where(1).equalTo(0) .with(new ProjectEdgeWithNeighbor()); return edgesWithNeighbors.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction(neighborsFunction)); default: throw new IllegalArgumentException("Illegal edge direction"); } } /** * Groups by vertex and computes a GroupReduce transformation over the neighbors (both edges and vertices) * of each vertex. The neighborsFunction applied on the neighbors only has access to the vertex id * (not the vertex value) of the grouping vertex. * * For each vertex, the neighborsFunction can iterate over all neighbors of this vertex * with the specified direction, and emit any number of output elements, including none. * * @param neighborsFunction the group reduce function to apply to the neighboring edges and vertices * of each vertex. * @param direction the edge direction (in-, out-, all-). * @param the output type * @param typeInfo the explicit return type * @return a DataSet containing elements of type T * @throws IllegalArgumentException */ public DataSet groupReduceOnNeighbors(NeighborsFunction neighborsFunction, EdgeDirection direction, TypeInformation typeInfo) throws IllegalArgumentException { switch (direction) { case IN: // create pairs DataSet, Vertex>> edgesWithSources = edges .join(this.vertices).where(0).equalTo(0) .with(new ProjectVertexIdJoin(1)) .withForwardedFieldsFirst("f1->f0"); return edgesWithSources.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction(neighborsFunction)).returns(typeInfo); case OUT: // create pairs DataSet, Vertex>> edgesWithTargets = edges .join(this.vertices).where(1).equalTo(0) .with(new ProjectVertexIdJoin(0)) .withForwardedFieldsFirst("f0"); return edgesWithTargets.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction(neighborsFunction)).returns(typeInfo); case ALL: // create pairs DataSet, Vertex>> edgesWithNeighbors = edges .flatMap(new EmitOneEdgeWithNeighborPerNode()) .join(this.vertices).where(1).equalTo(0) .with(new ProjectEdgeWithNeighbor()); return edgesWithNeighbors.groupBy(0).reduceGroup( new ApplyNeighborGroupReduceFunction(neighborsFunction)).returns(typeInfo); default: throw new IllegalArgumentException("Illegal edge direction"); } } private static final class ApplyNeighborGroupReduceFunction implements GroupReduceFunction, Vertex>, T>, ResultTypeQueryable { private NeighborsFunction function; public ApplyNeighborGroupReduceFunction(NeighborsFunction fun) { this.function = fun; } public void reduce(Iterable, Vertex>> edges, Collector out) throws Exception { function.iterateNeighbors(edges, out); } @Override public TypeInformation getProducedType() { return TypeExtractor.createTypeInfo(NeighborsFunction.class, function.getClass(), 3, null, null); } } @ForwardedFieldsSecond("f1") private static final class ProjectVertexWithNeighborValueJoin implements FlatJoinFunction, Vertex, Tuple2> { private int fieldPosition; public ProjectVertexWithNeighborValueJoin(int position) { this.fieldPosition = position; } @SuppressWarnings("unchecked") public void join(Edge edge, Vertex otherVertex, Collector> out) { out.collect(new Tuple2((K) edge.getField(fieldPosition), otherVertex.getValue())); } } private static final class ProjectVertexIdJoin implements FlatJoinFunction< Edge, Vertex, Tuple3, Vertex>> { private int fieldPosition; public ProjectVertexIdJoin(int position) { this.fieldPosition = position; } @SuppressWarnings("unchecked") public void join(Edge edge, Vertex otherVertex, Collector, Vertex>> out) { out.collect(new Tuple3, Vertex>((K) edge.getField(fieldPosition), edge, otherVertex)); } } @ForwardedFieldsFirst("f0") @ForwardedFieldsSecond("f1") private static final class ProjectNeighborValue implements FlatJoinFunction< Tuple3>, Vertex, Tuple2> { public void join(Tuple3> keysWithEdge, Vertex neighbor, Collector> out) { out.collect(new Tuple2(keysWithEdge.f0, neighbor.getValue())); } } @ForwardedFieldsFirst("f0; f2->f1") @ForwardedFieldsSecond("*->f2") private static final class ProjectEdgeWithNeighbor implements FlatJoinFunction< Tuple3>, Vertex, Tuple3, Vertex>> { public void join(Tuple3> keysWithEdge, Vertex neighbor, Collector, Vertex>> out) { out.collect(new Tuple3, Vertex>(keysWithEdge.f0, keysWithEdge.f2, neighbor)); } } private static final class ApplyNeighborCoGroupFunction implements CoGroupFunction< Vertex, Tuple2, Vertex>, T>, ResultTypeQueryable { private NeighborsFunctionWithVertexValue function; public ApplyNeighborCoGroupFunction(NeighborsFunctionWithVertexValue fun) { this.function = fun; } public void coGroup(Iterable> vertex, Iterable, Vertex>> neighbors, Collector out) throws Exception { function.iterateNeighbors(vertex.iterator().next(), neighbors, out); } @Override public TypeInformation getProducedType() { return TypeExtractor.createTypeInfo(NeighborsFunctionWithVertexValue.class, function.getClass(), 3, null, null); } } private static final class ApplyCoGroupFunctionOnAllNeighbors implements CoGroupFunction, Tuple3, Vertex>, T>, ResultTypeQueryable { private NeighborsFunctionWithVertexValue function; public ApplyCoGroupFunctionOnAllNeighbors(NeighborsFunctionWithVertexValue fun) { this.function = fun; } public void coGroup(Iterable> vertex, final Iterable, Vertex>> keysWithNeighbors, Collector out) throws Exception { final Iterator, Vertex>> neighborsIterator = new Iterator, Vertex>>() { final Iterator, Vertex>> keysWithEdgesIterator = keysWithNeighbors.iterator(); @Override public boolean hasNext() { return keysWithEdgesIterator.hasNext(); } @Override public Tuple2, Vertex> next() { Tuple3, Vertex> next = keysWithEdgesIterator.next(); return new Tuple2, Vertex>(next.f1, next.f2); } @Override public void remove() { keysWithEdgesIterator.remove(); } }; Iterable, Vertex>> neighborsIterable = new Iterable, Vertex>>() { public Iterator, Vertex>> iterator() { return neighborsIterator; } }; Iterator> vertexIterator = vertex.iterator(); if (vertexIterator.hasNext()) { function.iterateNeighbors(vertexIterator.next(), neighborsIterable, out); } else { throw new NoSuchElementException("The edge src/trg id could not be found within the vertexIds"); } } @Override public TypeInformation getProducedType() { return TypeExtractor.createTypeInfo(NeighborsFunctionWithVertexValue.class, function.getClass(), 3, null, null); } } /** * Compute a reduce transformation over the neighbors' vertex values of each vertex. * For each vertex, the transformation consecutively calls a * {@link ReduceNeighborsFunction} until only a single value for each vertex remains. * The {@link ReduceNeighborsFunction} combines a pair of neighbor vertex values * into one new value of the same type. * * @param reduceNeighborsFunction the reduce function to apply to the neighbors of each vertex. * @param direction the edge direction (in-, out-, all-) * @return a Dataset of Tuple2, with one tuple per vertex. * The first field of the Tuple2 is the vertex ID and the second field * is the aggregate value computed by the provided {@link ReduceNeighborsFunction}. * @throws IllegalArgumentException */ public DataSet> reduceOnNeighbors(ReduceNeighborsFunction reduceNeighborsFunction, EdgeDirection direction) throws IllegalArgumentException { switch (direction) { case IN: // create pairs final DataSet> verticesWithSourceNeighborValues = edges .join(this.vertices).where(0).equalTo(0) .with(new ProjectVertexWithNeighborValueJoin(1)) .withForwardedFieldsFirst("f1->f0"); return verticesWithSourceNeighborValues.groupBy(0).reduce(new ApplyNeighborReduceFunction( reduceNeighborsFunction)); case OUT: // create pairs DataSet> verticesWithTargetNeighborValues = edges .join(this.vertices).where(1).equalTo(0) .with(new ProjectVertexWithNeighborValueJoin(0)) .withForwardedFieldsFirst("f0"); return verticesWithTargetNeighborValues.groupBy(0).reduce(new ApplyNeighborReduceFunction( reduceNeighborsFunction)); case ALL: // create pairs DataSet> verticesWithNeighborValues = edges .flatMap(new EmitOneEdgeWithNeighborPerNode()) .join(this.vertices).where(1).equalTo(0) .with(new ProjectNeighborValue()); return verticesWithNeighborValues.groupBy(0).reduce(new ApplyNeighborReduceFunction( reduceNeighborsFunction)); default: throw new IllegalArgumentException("Illegal edge direction"); } } @ForwardedFields("f0") private static final class ApplyNeighborReduceFunction implements ReduceFunction> { private ReduceNeighborsFunction function; public ApplyNeighborReduceFunction(ReduceNeighborsFunction fun) { this.function = fun; } @Override public Tuple2 reduce(Tuple2 first, Tuple2 second) throws Exception { first.setField(function.reduceNeighbors(first.f1, second.f1), 1); return first; } } /** * Compute a reduce transformation over the edge values of each vertex. * For each vertex, the transformation consecutively calls a * {@link ReduceEdgesFunction} until only a single value for each edge remains. * The {@link ReduceEdgesFunction} combines two edge values into one new value of the same type. * * @param reduceEdgesFunction the reduce function to apply to the neighbors of each vertex. * @param direction the edge direction (in-, out-, all-) * @return a Dataset of Tuple2, with one tuple per vertex. * The first field of the Tuple2 is the vertex ID and the second field * is the aggregate value computed by the provided {@link ReduceEdgesFunction}. * @throws IllegalArgumentException */ public DataSet> reduceOnEdges(ReduceEdgesFunction reduceEdgesFunction, EdgeDirection direction) throws IllegalArgumentException { switch (direction) { case IN: return edges.map(new ProjectVertexWithEdgeValueMap(1)) .withForwardedFields("f1->f0") .groupBy(0).reduce(new ApplyReduceFunction(reduceEdgesFunction)); case OUT: return edges.map(new ProjectVertexWithEdgeValueMap(0)) .withForwardedFields("f0->f0") .groupBy(0).reduce(new ApplyReduceFunction(reduceEdgesFunction)); case ALL: return edges.flatMap(new EmitOneVertexWithEdgeValuePerNode()) .withForwardedFields("f2->f1") .groupBy(0).reduce(new ApplyReduceFunction(reduceEdgesFunction)); default: throw new IllegalArgumentException("Illegal edge direction"); } } @ForwardedFields("f0") private static final class ApplyReduceFunction implements ReduceFunction> { private ReduceEdgesFunction function; public ApplyReduceFunction(ReduceEdgesFunction fun) { this.function = fun; } @Override public Tuple2 reduce(Tuple2 first, Tuple2 second) throws Exception { first.setField(function.reduceEdges(first.f1, second.f1), 1); return first; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy