Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.graph;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.List;
import java.util.Arrays;
import org.apache.flink.api.common.functions.CoGroupFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.graph.gsa.ApplyFunction;
import org.apache.flink.graph.gsa.GSAConfiguration;
import org.apache.flink.graph.gsa.GatherFunction;
import org.apache.flink.graph.gsa.GatherSumApplyIteration;
import org.apache.flink.graph.gsa.SumFunction;
import org.apache.flink.graph.spargel.MessagingFunction;
import org.apache.flink.graph.spargel.VertexCentricConfiguration;
import org.apache.flink.graph.spargel.VertexCentricIteration;
import org.apache.flink.graph.spargel.VertexUpdateFunction;
import org.apache.flink.graph.utils.EdgeToTuple3Map;
import org.apache.flink.graph.utils.Tuple2ToVertexMap;
import org.apache.flink.graph.utils.Tuple3ToEdgeMap;
import org.apache.flink.graph.utils.VertexToTuple2Map;
import org.apache.flink.graph.validation.GraphValidator;
import org.apache.flink.util.Collector;
import org.apache.flink.types.NullValue;
/**
* Represents a Graph consisting of {@link Edge edges} and {@link Vertex
* vertices}.
*
*
* @see org.apache.flink.graph.Edge
* @see org.apache.flink.graph.Vertex
*
* @param the key type for edge and vertex identifiers
* @param the value type for vertices
* @param the value type for edges
*/
@SuppressWarnings("serial")
public class Graph {
private final ExecutionEnvironment context;
private final DataSet> vertices;
private final DataSet> edges;
/**
* Creates a graph from two DataSets: vertices and edges
*
* @param vertices a DataSet of vertices.
* @param edges a DataSet of edges.
* @param context the flink execution environment.
*/
private Graph(DataSet> vertices, DataSet> edges, ExecutionEnvironment context) {
this.vertices = vertices;
this.edges = edges;
this.context = context;
}
/**
* Creates a graph from a Collection of vertices and a Collection of edges.
*
* @param vertices a Collection of vertices.
* @param edges a Collection of edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromCollection(Collection> vertices,
Collection> edges, ExecutionEnvironment context) {
return fromDataSet(context.fromCollection(vertices),
context.fromCollection(edges), context);
}
/**
* Creates a graph from a Collection of edges, vertices are induced from the
* edges. Vertices are created automatically and their values are set to
* NullValue.
*
* @param edges a Collection of vertices.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromCollection(Collection> edges,
ExecutionEnvironment context) {
return fromDataSet(context.fromCollection(edges), context);
}
/**
* Creates a graph from a Collection of edges, vertices are induced from the
* edges and vertex values are calculated by a mapper function. Vertices are
* created automatically and their values are set by applying the provided
* map function to the vertex ids.
*
* @param edges a Collection of edges.
* @param mapper the mapper function.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromCollection(Collection> edges,
final MapFunction mapper,ExecutionEnvironment context) {
return fromDataSet(context.fromCollection(edges), mapper, context);
}
/**
* Creates a graph from a DataSet of vertices and a DataSet of edges.
*
* @param vertices a DataSet of vertices.
* @param edges a DataSet of edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromDataSet(DataSet> vertices,
DataSet> edges, ExecutionEnvironment context) {
return new Graph(vertices, edges, context);
}
/**
* Creates a graph from a DataSet of edges, vertices are induced from the
* edges. Vertices are created automatically and their values are set to
* NullValue.
*
* @param edges a DataSet of edges.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromDataSet(
DataSet> edges, ExecutionEnvironment context) {
DataSet> vertices = edges.flatMap(new EmitSrcAndTarget()).distinct();
return new Graph(vertices, edges, context);
}
private static final class EmitSrcAndTarget implements FlatMapFunction<
Edge, Vertex> {
public void flatMap(Edge edge, Collector> out) {
out.collect(new Vertex(edge.f0, NullValue.getInstance()));
out.collect(new Vertex(edge.f1, NullValue.getInstance()));
}
}
/**
* Creates a graph from a DataSet of edges, vertices are induced from the
* edges and vertex values are calculated by a mapper function. Vertices are
* created automatically and their values are set by applying the provided
* map function to the vertex ids.
*
* @param edges a DataSet of edges.
* @param mapper the mapper function.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromDataSet(DataSet> edges,
final MapFunction mapper, ExecutionEnvironment context) {
TypeInformation keyType = ((TupleTypeInfo>) edges.getType()).getTypeAt(0);
TypeInformation valueType = TypeExtractor.createTypeInfo(
MapFunction.class, mapper.getClass(), 1, null, null);
@SuppressWarnings({ "unchecked", "rawtypes" })
TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo(
Vertex.class, keyType, valueType);
DataSet> vertices = edges
.flatMap(new EmitSrcAndTargetAsTuple1()).distinct()
.map(new MapFunction, Vertex>() {
public Vertex map(Tuple1 value) throws Exception {
return new Vertex(value.f0, mapper.map(value.f0));
}
}).returns(returnType).withForwardedFields("f0");
return new Graph(vertices, edges, context);
}
private static final class EmitSrcAndTargetAsTuple1 implements FlatMapFunction<
Edge, Tuple1> {
public void flatMap(Edge edge, Collector> out) {
out.collect(new Tuple1(edge.f0));
out.collect(new Tuple1(edge.f1));
}
}
/**
* Creates a graph from a DataSet of Tuple objects for vertices and edges.
*
* Vertices with value are created from Tuple2, Edges with value are created
* from Tuple3.
*
* @param vertices a DataSet of Tuple2.
* @param edges a DataSet of Tuple3.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromTupleDataSet(DataSet> vertices,
DataSet> edges, ExecutionEnvironment context) {
DataSet> vertexDataSet = vertices.map(new Tuple2ToVertexMap());
DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap());
return fromDataSet(vertexDataSet, edgeDataSet, context);
}
/**
* Creates a graph from a DataSet of Tuple objects for edges, vertices are
* induced from the edges.
*
* Edges with value are created from Tuple3. Vertices are created
* automatically and their values are set to NullValue.
*
* @param edges a DataSet of Tuple3.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromTupleDataSet(DataSet> edges,
ExecutionEnvironment context) {
DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap());
return fromDataSet(edgeDataSet, context);
}
/**
* Creates a graph from a DataSet of Tuple objects for edges, vertices are
* induced from the edges and vertex values are calculated by a mapper
* function. Edges with value are created from Tuple3. Vertices are created
* automatically and their values are set by applying the provided map
* function to the vertex ids.
*
* @param edges a DataSet of Tuple3.
* @param mapper the mapper function.
* @param context the flink execution environment.
* @return the newly created graph.
*/
public static Graph fromTupleDataSet(DataSet> edges,
final MapFunction mapper, ExecutionEnvironment context) {
DataSet> edgeDataSet = edges.map(new Tuple3ToEdgeMap());
return fromDataSet(edgeDataSet, mapper, context);
}
/**
* @return the flink execution environment.
*/
public ExecutionEnvironment getContext() {
return this.context;
}
/**
* Function that checks whether a Graph is a valid Graph,
* as defined by the given {@link GraphValidator}.
*
* @return true if the Graph is valid.
*/
public Boolean validate(GraphValidator validator) throws Exception {
return validator.validate(this);
}
/**
* @return the vertex DataSet.
*/
public DataSet> getVertices() {
return vertices;
}
/**
* @return the edge DataSet.
*/
public DataSet> getEdges() {
return edges;
}
/**
* @return the vertex DataSet as Tuple2.
*/
public DataSet> getVerticesAsTuple2() {
return vertices.map(new VertexToTuple2Map());
}
/**
* @return the edge DataSet as Tuple3.
*/
public DataSet> getEdgesAsTuple3() {
return edges.map(new EdgeToTuple3Map());
}
/**
* This method allows access to the graph's edge values along with its source and target vertex values.
*
* @return a triplet DataSet consisting of (srcVertexId, trgVertexId, srcVertexValue, trgVertexValue, edgeValue)
*/
public DataSet> getTriplets() {
return this.getVertices().join(this.getEdges()).where(0).equalTo(0)
.with(new ProjectEdgeWithSrcValue())
.join(this.getVertices()).where(1).equalTo(0)
.with(new ProjectEdgeWithVertexValues());
}
@ForwardedFieldsFirst("f1->f2")
@ForwardedFieldsSecond("f0; f1; f2->f3")
private static final class ProjectEdgeWithSrcValue implements
FlatJoinFunction, Edge, Tuple4> {
@Override
public void join(Vertex vertex, Edge edge, Collector> collector)
throws Exception {
collector.collect(new Tuple4(edge.getSource(), edge.getTarget(), vertex.getValue(),
edge.getValue()));
}
}
@ForwardedFieldsFirst("f0; f1; f2; f3->f4")
@ForwardedFieldsSecond("f1->f3")
private static final class ProjectEdgeWithVertexValues implements
FlatJoinFunction, Vertex, Triplet> {
@Override
public void join(Tuple4 tripletWithSrcValSet,
Vertex vertex, Collector> collector) throws Exception {
collector.collect(new Triplet(tripletWithSrcValSet.f0, tripletWithSrcValSet.f1,
tripletWithSrcValSet.f2, vertex.getValue(), tripletWithSrcValSet.f3));
}
}
/**
* Apply a function to the attribute of each vertex in the graph.
*
* @param mapper the map function to apply.
* @return a new graph
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public Graph mapVertices(final MapFunction, NV> mapper) {
TypeInformation keyType = ((TupleTypeInfo>) vertices.getType()).getTypeAt(0);
TypeInformation valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, null, null);
TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo(
Vertex.class, keyType, valueType);
DataSet> mappedVertices = vertices.map(
new MapFunction, Vertex>() {
public Vertex map(Vertex value) throws Exception {
return new Vertex(value.f0, mapper.map(value));
}
})
.returns(returnType)
.withForwardedFields("f0");
return new Graph(mappedVertices, this.edges, this.context);
}
/**
* Apply a function to the attribute of each edge in the graph.
*
* @param mapper the map function to apply.
* @return a new graph
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public Graph mapEdges(final MapFunction, NV> mapper) {
TypeInformation keyType = ((TupleTypeInfo>) edges.getType()).getTypeAt(0);
TypeInformation valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, null, null);
TypeInformation> returnType = (TypeInformation>) new TupleTypeInfo(
Edge.class, keyType, keyType, valueType);
DataSet> mappedEdges = edges.map(
new MapFunction, Edge>() {
public Edge map(Edge value) throws Exception {
return new Edge(value.f0, value.f1, mapper
.map(value));
}
})
.returns(returnType)
.withForwardedFields("f0; f1");
return new Graph(this.vertices, mappedEdges, this.context);
}
/**
* Joins the vertex DataSet of this graph with an input DataSet and applies
* a UDF on the resulted values.
*
* @param inputDataSet the DataSet to join with.
* @param mapper the UDF map function to apply.
* @return a new graph where the vertex values have been updated.
*/
public Graph joinWithVertices(DataSet> inputDataSet,
final MapFunction, VV> mapper) {
DataSet> resultedVertices = this.getVertices()
.coGroup(inputDataSet).where(0).equalTo(0)
.with(new ApplyCoGroupToVertexValues(mapper));
return new Graph(resultedVertices, this.edges, this.context);
}
private static final class ApplyCoGroupToVertexValues
implements CoGroupFunction, Tuple2, Vertex> {
private MapFunction, VV> mapper;
public ApplyCoGroupToVertexValues(MapFunction, VV> mapper) {
this.mapper = mapper;
}
@Override
public void coGroup(Iterable> vertices,
Iterable> input, Collector> collector) throws Exception {
final Iterator> vertexIterator = vertices.iterator();
final Iterator> inputIterator = input.iterator();
if (vertexIterator.hasNext()) {
if (inputIterator.hasNext()) {
final Tuple2 inputNext = inputIterator.next();
collector.collect(new Vertex(inputNext.f0, mapper
.map(new Tuple2(vertexIterator.next().f1,
inputNext.f1))));
} else {
collector.collect(vertexIterator.next());
}
}
}
}
/**
* Joins the edge DataSet with an input DataSet on a composite key of both
* source and target and applies a UDF on the resulted values.
*
* @param inputDataSet the DataSet to join with.
* @param mapper the UDF map function to apply.
* @param the return type
* @return a new graph where the edge values have been updated.
*/
public Graph joinWithEdges(DataSet> inputDataSet,
final MapFunction, EV> mapper) {
DataSet> resultedEdges = this.getEdges()
.coGroup(inputDataSet).where(0, 1).equalTo(0, 1)
.with(new ApplyCoGroupToEdgeValues(mapper));
return new Graph(this.vertices, resultedEdges, this.context);
}
private static final class ApplyCoGroupToEdgeValues
implements CoGroupFunction, Tuple3, Edge> {
private MapFunction, EV> mapper;
public ApplyCoGroupToEdgeValues(MapFunction, EV> mapper) {
this.mapper = mapper;
}
@Override
public void coGroup(Iterable> edges, Iterable> input,
Collector> collector) throws Exception {
final Iterator> edgesIterator = edges.iterator();
final Iterator> inputIterator = input.iterator();
if (edgesIterator.hasNext()) {
if (inputIterator.hasNext()) {
final Tuple3 inputNext = inputIterator.next();
collector.collect(new Edge(inputNext.f0,
inputNext.f1, mapper.map(new Tuple2(
edgesIterator.next().f2, inputNext.f2))));
} else {
collector.collect(edgesIterator.next());
}
}
}
}
/**
* Joins the edge DataSet with an input DataSet on the source key of the
* edges and the first attribute of the input DataSet and applies a UDF on
* the resulted values. In case the inputDataSet contains the same key more
* than once, only the first value will be considered.
*
* @param inputDataSet the DataSet to join with.
* @param mapper the UDF map function to apply.
* @param the return type
* @return a new graph where the edge values have been updated.
*/
public Graph joinWithEdgesOnSource(DataSet> inputDataSet,
final MapFunction, EV> mapper) {
DataSet> resultedEdges = this.getEdges()
.coGroup(inputDataSet).where(0).equalTo(0)
.with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget(mapper));
return new Graph(this.vertices, resultedEdges, this.context);
}
private static final class ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget
implements CoGroupFunction, Tuple2, Edge> {
private MapFunction, EV> mapper;
public ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget(
MapFunction, EV> mapper) {
this.mapper = mapper;
}
@Override
public void coGroup(Iterable> edges,
Iterable> input, Collector> collector) throws Exception {
final Iterator> edgesIterator = edges.iterator();
final Iterator> inputIterator = input.iterator();
if (inputIterator.hasNext()) {
final Tuple2 inputNext = inputIterator.next();
while (edgesIterator.hasNext()) {
Edge edgesNext = edgesIterator.next();
collector.collect(new Edge(edgesNext.f0,
edgesNext.f1, mapper.map(new Tuple2(
edgesNext.f2, inputNext.f1))));
}
} else {
while (edgesIterator.hasNext()) {
collector.collect(edgesIterator.next());
}
}
}
}
/**
* Joins the edge DataSet with an input DataSet on the target key of the
* edges and the first attribute of the input DataSet and applies a UDF on
* the resulted values. Should the inputDataSet contain the same key more
* than once, only the first value will be considered.
*
* @param inputDataSet the DataSet to join with.
* @param mapper the UDF map function to apply.
* @param the return type
* @return a new graph where the edge values have been updated.
*/
public Graph joinWithEdgesOnTarget(DataSet> inputDataSet,
final MapFunction, EV> mapper) {
DataSet> resultedEdges = this.getEdges()
.coGroup(inputDataSet).where(1).equalTo(0)
.with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget(mapper));
return new Graph(this.vertices, resultedEdges, this.context);
}
/**
* Apply filtering functions to the graph and return a sub-graph that
* satisfies the predicates for both vertices and edges.
*
* @param vertexFilter the filter function for vertices.
* @param edgeFilter the filter function for edges.
* @return the resulting sub-graph.
*/
public Graph subgraph(FilterFunction> vertexFilter, FilterFunction> edgeFilter) {
DataSet> filteredVertices = this.vertices.filter(vertexFilter);
DataSet> remainingEdges = this.edges.join(filteredVertices)
.where(0).equalTo(0).with(new ProjectEdge())
.join(filteredVertices).where(1).equalTo(0)
.with(new ProjectEdge());
DataSet> filteredEdges = remainingEdges.filter(edgeFilter);
return new Graph(filteredVertices, filteredEdges,
this.context);
}
/**
* Apply a filtering function to the graph and return a sub-graph that
* satisfies the predicates only for the vertices.
*
* @param vertexFilter the filter function for vertices.
* @return the resulting sub-graph.
*/
public Graph filterOnVertices(FilterFunction> vertexFilter) {
DataSet> filteredVertices = this.vertices.filter(vertexFilter);
DataSet> remainingEdges = this.edges.join(filteredVertices)
.where(0).equalTo(0).with(new ProjectEdge())
.join(filteredVertices).where(1).equalTo(0)
.with(new ProjectEdge());
return new Graph(filteredVertices, remainingEdges, this.context);
}
/**
* Apply a filtering function to the graph and return a sub-graph that
* satisfies the predicates only for the edges.
*
* @param edgeFilter the filter function for edges.
* @return the resulting sub-graph.
*/
public Graph filterOnEdges(FilterFunction> edgeFilter) {
DataSet> filteredEdges = this.edges.filter(edgeFilter);
return new Graph(this.vertices, filteredEdges, this.context);
}
@ForwardedFieldsFirst("f0; f1; f2")
private static final class ProjectEdge implements FlatJoinFunction<
Edge, Vertex, Edge> {
public void join(Edge first, Vertex second, Collector> out) {
out.collect(first);
}
}
/**
* Return the out-degree of all vertices in the graph
*
* @return A DataSet of Tuple2
*/
public DataSet> outDegrees() {
return vertices.coGroup(edges).where(0).equalTo(0).with(new CountNeighborsCoGroup());
}
private static final class CountNeighborsCoGroup
implements CoGroupFunction, Edge, Tuple2> {
@SuppressWarnings("unused")
public void coGroup(Iterable> vertex, Iterable> outEdges,
Collector> out) {
long count = 0;
for (Edge edge : outEdges) {
count++;
}
Iterator> vertexIterator = vertex.iterator();
if(vertexIterator.hasNext()) {
out.collect(new Tuple2(vertexIterator.next().f0, count));
} else {
throw new NoSuchElementException("The edge src/trg id could not be found within the vertexIds");
}
}
}
/**
* Return the in-degree of all vertices in the graph
*
* @return A DataSet of Tuple2
*/
public DataSet> inDegrees() {
return vertices.coGroup(edges).where(0).equalTo(1).with(new CountNeighborsCoGroup());
}
/**
* Return the degree of all vertices in the graph
*
* @return A DataSet of Tuple2