All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.spargel.java.examples.SpargelPageRankCountingVertices Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.spargel.java.examples;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.spargel.java.MessageIterator;
import org.apache.flink.spargel.java.MessagingFunction;
import org.apache.flink.spargel.java.OutgoingEdge;
import org.apache.flink.spargel.java.VertexCentricIteration;
import org.apache.flink.spargel.java.VertexUpdateFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;

/**
 * An implementation of the basic PageRank algorithm in the vertex-centric API (spargel).
 * In this implementation, the edges carry a weight (the transition probability).
 */
@SuppressWarnings("serial")
public class SpargelPageRankCountingVertices {
	
	private static final double BETA = 0.85;

	
	public static void main(String[] args) throws Exception {
		final int NUM_VERTICES = 100;
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		// a list of vertices
		DataSet vertices = env.generateSequence(1, NUM_VERTICES);
		
		// generate some random edges. the transition probability on each edge is 1/num-out-edges of the source vertex
		DataSet> edgesWithProbability = env.generateSequence(1, NUM_VERTICES)
								.flatMap(new FlatMapFunction>() {
									public void flatMap(Long value, Collector> out) {
										int numOutEdges = (int) (Math.random() * (NUM_VERTICES / 2));
										for (int i = 0; i < numOutEdges; i++) {
											long target = (long) (Math.random() * NUM_VERTICES) + 1;
											out.collect(new Tuple3(value, target, 1.0/numOutEdges));
										}
									}
								});
		
		// ---------- start of the algorithm ---------------
		
		// count the number of vertices
		DataSet count = vertices
			.map(new MapFunction() {
				public Long map(Long value) {
					return 1L;
				}
			})
			.reduce(new ReduceFunction() {
				public Long reduce(Long value1, Long value2) {
					return value1 + value2;
				}
			});
		
		// enumerate some sample edges and assign an initial uniform probability (rank)
		DataSet> intialRanks = vertices
			.map(new RichMapFunction>() {
				
				private long numVertices;
				
				@Override
				public void open(Configuration parameters) {
					numVertices = getRuntimeContext().getBroadcastVariable("count").iterator().next();
				}
				
				public Tuple2 map(Long value) {
					return new Tuple2(value, 1.0/numVertices);
				}
			}).withBroadcastSet(count, "count");
		

		VertexCentricIteration iteration = VertexCentricIteration.withValuedEdges(edgesWithProbability,
				new VertexRankUpdater(BETA), new RankMessenger(), 20);
		iteration.addBroadcastSetForUpdateFunction("count", count);
		
		
		DataSet> result = intialRanks.runOperation(iteration);
		
		result.print();
		env.execute("Spargel PageRank");
	}
	
	/**
	 * Function that updates the rank of a vertex by summing up the partial ranks from all incoming messages
	 * and then applying the dampening formula.
	 */
	public static final class VertexRankUpdater extends VertexUpdateFunction {
		
		private final double beta;
		private long numVertices;
		
		public VertexRankUpdater(double beta) {
			this.beta = beta;
		}
		
		@Override
		public void preSuperstep() {
			numVertices = this.getBroadcastSet("count").iterator().next();
		}

		@Override
		public void updateVertex(Long vertexKey, Double vertexValue, MessageIterator inMessages) {
			double rankSum = 0.0;
			for (double msg : inMessages) {
				rankSum += msg;
			}
			
			// apply the dampening factor / random jump
			double newRank = (beta * rankSum) + (1-BETA)/numVertices;
			setNewVertexValue(newRank);
		}
	}
	
	/**
	 * Distributes the rank of a vertex among all target vertices according to the transition probability,
	 * which is associated with an edge as the edge value.
	 */
	public static final class RankMessenger extends MessagingFunction {
		
		@Override
		public void sendMessages(Long vertexId, Double newRank) {
			for (OutgoingEdge edge : getOutgoingEdges()) {
				sendMessageTo(edge.target(), newRank * edge.edgeValue());
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy