All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.graph.library.Summarization Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.graph.library;

import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.functions.FunctionAnnotation;
import org.apache.flink.api.java.operators.GroupReduceOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.graph.Edge;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.GraphAlgorithm;
import org.apache.flink.graph.Vertex;
import org.apache.flink.util.Collector;

/**
 * The summarization algorithm computes a condensed version of the input graph
* by grouping vertices and edges based on their values. By doing this, the
* algorithm helps to uncover insights about patterns and distributions in the
* graph. *

* In the resulting graph, each vertex represents a group of vertices that share the
* same vertex value. An edge, that connects a vertex with itself, represents all edges
* with the same edge value that connect vertices inside that group. An edge between
* vertices in the output graph represents all edges with the same edge value between
* members of those groups in the input graph. *

* Consider the following example: *

* Input graph: *

* Vertices (id, value):
* (0, "A")
* (1, "A")
* (2, "B")
* (3, "B")
*

* Edges (source, target, value): * (0,1, null)
* (1,0, null)
* (1,2, null)
* (2,1, null)
* (2,3, null)
* (3,2, null)
*

* Output graph: *

* Vertices (id, (value, count)):
* (0, ("A", 2)) // 0 and 1
* (2, ("B", 2)) // 2 and 3
*

* Edges (source, target, (value, count)):
* (0, 0, (null, 2)) // (0,1) and (1,0)
* (2, 2, (null, 2)) // (2,3) and (3,2)
* (0, 2, (null, 1)) // (1,2)
* (2, 0, (null, 1)) // (2,1)
* * Note that this implementation is non-deterministic in the way that it assigns
* identifiers to summarized vertices. However, it is guaranteed that the identifier
* is one of the represented vertex identifiers. * * @param vertex identifier type * @param vertex value type * @param edge value type */ public class Summarization implements GraphAlgorithm, Summarization.EdgeValue>> { @Override public Graph, EdgeValue> run(Graph input) throws Exception { // ------------------------- // build summarized vertices // ------------------------- // group vertices by value UnsortedGrouping> vertexUnsortedGrouping = input.getVertices() .groupBy(1); // reduce vertex group and create vertex group items GroupReduceOperator, VertexGroupItem> vertexGroupItems = vertexUnsortedGrouping .reduceGroup(new VertexGroupReducer()); // create summarized vertices DataSet>> summarizedVertices = vertexGroupItems .filter(new VertexGroupItemToSummarizedVertexFilter()) .map(new VertexGroupItemToSummarizedVertexMapper()); // create mapping between vertices and their representative DataSet> vertexToRepresentativeMap = vertexGroupItems .filter(new VertexGroupItemToRepresentativeFilter()) .map(new VertexGroupItemToVertexWithRepresentativeMapper()); // ------------------------- // build summarized edges // ------------------------- // join edges with vertex representatives and update source and target identifiers DataSet> edgesForGrouping = input.getEdges() .join(vertexToRepresentativeMap) .where(0) // source vertex id .equalTo(0) // vertex id .with(new SourceVertexJoinFunction()) .join(vertexToRepresentativeMap) .where(1) // target vertex id .equalTo(0) // vertex id .with(new TargetVertexJoinFunction()); // create summarized edges DataSet>> summarizedEdges = edgesForGrouping .groupBy(0, 1, 2) // group by source id (0), target id (1) and edge value (2) .reduceGroup(new EdgeGroupReducer()); return Graph.fromDataSet(summarizedVertices, summarizedEdges, input.getContext()); } // -------------------------------------------------------------------------------------------- // Tuple Types // -------------------------------------------------------------------------------------------- /** * Value that is stored at a summarized vertex. * * f0: vertex group value * f1: vertex group count * * @param vertex value type */ @SuppressWarnings("serial") public static final class VertexValue extends Tuple2 { public VV getVertexGroupValue() { return f0; } public void setVertexGroupValue(VV vertexGroupValue) { f0 = vertexGroupValue; } public Long getVertexGroupCount() { return f1; } public void setVertexGroupCount(Long vertexGroupCount) { f1 = vertexGroupCount; } } /** * Value that is stored at a summarized edge. * * f0: edge group value * f1: edge group count * * @param edge value type */ @SuppressWarnings("serial") public static final class EdgeValue extends Tuple2 { public EV getEdgeGroupValue() { return f0; } public void setEdgeGroupValue(EV edgeGroupValue) { f0 = edgeGroupValue; } public Long getEdgeGroupCount() { return f1; } public void setEdgeGroupCount(Long edgeGroupCount) { f1 = edgeGroupCount; } } /** * Represents a single vertex in a vertex group. * * f0: vertex identifier * f1: vertex group representative identifier * f2: vertex group value * f3: vertex group count * * @param vertex identifier type * @param vertex group value type */ @SuppressWarnings("serial") public static final class VertexGroupItem extends Tuple4 { public VertexGroupItem() { setVertexGroupCount(0L); } public K getVertexId() { return f0; } public void setVertexId(K vertexId) { f0 = vertexId; } public K getGroupRepresentativeId() { return f1; } public void setGroupRepresentativeId(K groupRepresentativeId) { f1 = groupRepresentativeId; } public VGV getVertexGroupValue() { return f2; } public void setVertexGroupValue(VGV vertexGroupValue) { f2 = vertexGroupValue; } public Long getVertexGroupCount() { return f3; } public void setVertexGroupCount(Long vertexGroupCount) { f3 = vertexGroupCount; } /** * Resets the fields to initial values. This is necessary if the tuples are reused and not all fields were modified. */ public void reset() { f0 = null; f1 = null; f2 = null; f3 = 0L; } } /** * Represents a vertex identifier and its corresponding vertex group identifier. * * @param vertex identifier type */ @SuppressWarnings("serial") public static final class VertexWithRepresentative extends Tuple2 { public void setVertexId(K vertexId) { f0 = vertexId; } public K getGroupRepresentativeId() { return f1; } public void setGroupRepresentativeId(K groupRepresentativeId) { f1 = groupRepresentativeId; } } // -------------------------------------------------------------------------------------------- // Functions // -------------------------------------------------------------------------------------------- /** * Creates one {@link VertexGroupItem} for each group element containing the vertex identifier and the identifier * of the group representative which is the first vertex in the reduce input iterable. * * Creates one {@link VertexGroupItem} representing the whole group that contains the vertex identifier of the * group representative, the vertex group value and the total number of group elements. * * @param vertex identifier type * @param vertex value type */ @SuppressWarnings("serial") private static final class VertexGroupReducer implements GroupReduceFunction, VertexGroupItem> { private final VertexGroupItem reuseVertexGroupItem; private VertexGroupReducer() { this.reuseVertexGroupItem = new VertexGroupItem<>(); } @Override public void reduce(Iterable> values, Collector> out) throws Exception { K vertexGroupRepresentativeID = null; long vertexGroupCount = 0L; VV vertexGroupValue = null; boolean isFirstElement = true; for (Vertex vertex : values) { if (isFirstElement) { // take final group representative vertex id from first tuple vertexGroupRepresentativeID = vertex.getId(); vertexGroupValue = vertex.getValue(); isFirstElement = false; } // no need to set group value for those tuples reuseVertexGroupItem.setVertexId(vertex.getId()); reuseVertexGroupItem.setGroupRepresentativeId(vertexGroupRepresentativeID); out.collect(reuseVertexGroupItem); vertexGroupCount++; } createGroupRepresentativeTuple(vertexGroupRepresentativeID, vertexGroupValue, vertexGroupCount); out.collect(reuseVertexGroupItem); reuseVertexGroupItem.reset(); } /** * Creates one tuple representing the whole group. This tuple is later used to create a summarized vertex for each * group. * * @param vertexGroupRepresentativeId group representative vertex identifier * @param vertexGroupValue group property value * @param vertexGroupCount total group count */ private void createGroupRepresentativeTuple(K vertexGroupRepresentativeId, VV vertexGroupValue, Long vertexGroupCount) { reuseVertexGroupItem.setVertexId(vertexGroupRepresentativeId); reuseVertexGroupItem.setVertexGroupValue(vertexGroupValue); reuseVertexGroupItem.setVertexGroupCount(vertexGroupCount); } } /** * Creates a summarized edge from a group of edges. Counts the number of elements in the group. * * @param vertex identifier type * @param edge group value type */ @SuppressWarnings("serial") private static final class EdgeGroupReducer implements GroupReduceFunction, Edge>> { private final Edge> reuseEdge; private final EdgeValue reuseEdgeValue; private EdgeGroupReducer() { reuseEdge = new Edge<>(); reuseEdgeValue = new EdgeValue<>(); } @Override public void reduce(Iterable> values, Collector>> out) throws Exception { K sourceVertexId = null; K targetVertexId = null; EV edgeGroupValue = null; Long edgeGroupCount = 0L; boolean isFirstElement = true; for (Edge edge : values) { if (isFirstElement) { sourceVertexId = edge.getSource(); targetVertexId = edge.getTarget(); edgeGroupValue = edge.getValue(); isFirstElement = false; } edgeGroupCount++; } reuseEdgeValue.setEdgeGroupValue(edgeGroupValue); reuseEdgeValue.setEdgeGroupCount(edgeGroupCount); reuseEdge.setSource(sourceVertexId); reuseEdge.setTarget(targetVertexId); reuseEdge.setValue(reuseEdgeValue); out.collect(reuseEdge); } } /** * Filter tuples that are representing a vertex group. They are used to create new summarized vertices and have a * group count greater than zero. * * @param vertex identifier type * @param vertex value type */ @SuppressWarnings("serial") @FunctionAnnotation.ForwardedFields("*->*") private static final class VertexGroupItemToSummarizedVertexFilter implements FilterFunction> { @Override public boolean filter(VertexGroupItem vertexGroupItem) throws Exception { return !vertexGroupItem.getVertexGroupCount().equals(0L); } } /** * Filter tuples that are representing a single vertex. They are used to update the source and target vertex * identifiers at the edges. * * @param vertex identifier type * @param vertex value type */ @SuppressWarnings("serial") @FunctionAnnotation.ForwardedFields("*->*") private static final class VertexGroupItemToRepresentativeFilter implements FilterFunction> { @Override public boolean filter(VertexGroupItem vertexGroupItem) throws Exception { return vertexGroupItem.getVertexGroupCount().equals(0L); } } /** * Creates a new vertex representing a vertex group. The vertex stores the group value and the number of vertices in * the group. * * @param vertex identifier type * @param vertex value type */ @SuppressWarnings("serial") private static final class VertexGroupItemToSummarizedVertexMapper implements MapFunction, Vertex>> { private final VertexValue reuseSummarizedVertexValue; private VertexGroupItemToSummarizedVertexMapper() { reuseSummarizedVertexValue = new VertexValue<>(); } @Override public Vertex> map(VertexGroupItem value) throws Exception { K vertexId = value.getVertexId(); reuseSummarizedVertexValue.setVertexGroupValue(value.getVertexGroupValue()); reuseSummarizedVertexValue.setVertexGroupCount(value.getVertexGroupCount()); return new Vertex<>(vertexId, reuseSummarizedVertexValue); } } /** * Creates a {@link VertexWithRepresentative} from a {@link VertexGroupItem}. * * @param vertex identifier type * @param vertex value type */ @SuppressWarnings("serial") @FunctionAnnotation.ForwardedFields("f0;f1") private static final class VertexGroupItemToVertexWithRepresentativeMapper implements MapFunction, VertexWithRepresentative> { private final VertexWithRepresentative reuseVertexWithRepresentative; private VertexGroupItemToVertexWithRepresentativeMapper() { reuseVertexWithRepresentative = new VertexWithRepresentative<>(); } @Override public VertexWithRepresentative map(VertexGroupItem vertexGroupItem) throws Exception { reuseVertexWithRepresentative.setVertexId(vertexGroupItem.getVertexId()); reuseVertexWithRepresentative.setGroupRepresentativeId(vertexGroupItem.getGroupRepresentativeId()); return reuseVertexWithRepresentative; } } /** * Replaces the source vertex id with the vertex group representative id and adds the edge group value. * * @param vertex identifier type * @param edge value type */ @SuppressWarnings("serial") @FunctionAnnotation.ForwardedFieldsFirst("f1") // edge target id @FunctionAnnotation.ForwardedFieldsSecond("f1->f0") // vertex group id -> edge source id private static final class SourceVertexJoinFunction implements JoinFunction, VertexWithRepresentative, Edge> { private final Edge reuseEdge; private SourceVertexJoinFunction() { this.reuseEdge = new Edge<>(); } @Override public Edge join(Edge edge, VertexWithRepresentative vertex) throws Exception { reuseEdge.setSource(vertex.getGroupRepresentativeId()); reuseEdge.setTarget(edge.getTarget()); reuseEdge.setValue(edge.getValue()); return reuseEdge; } } /** * Replaces the target vertex id with the vertex group identifier. * * @param vertex identifier type * @param edge group value type */ @SuppressWarnings("serial") @FunctionAnnotation.ForwardedFieldsFirst("f0;f2") // source vertex id, edge group value @FunctionAnnotation.ForwardedFieldsSecond("f1") // vertex group id -> edge target id private static final class TargetVertexJoinFunction implements JoinFunction, VertexWithRepresentative, Edge> { @Override public Edge join(Edge edge, VertexWithRepresentative vertexRepresentative) throws Exception { edge.setTarget(vertexRepresentative.getGroupRepresentativeId()); return edge; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy