All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.spargel.java.examples.SpargelPageRank Maven / Gradle / Ivy

There is a newer version: 0.9.1-hadoop1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.spargel.java.examples;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.spargel.java.MessageIterator;
import org.apache.flink.spargel.java.MessagingFunction;
import org.apache.flink.spargel.java.OutgoingEdge;
import org.apache.flink.spargel.java.VertexCentricIteration;
import org.apache.flink.spargel.java.VertexUpdateFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;

/**
 * An implementation of the basic PageRank algorithm in the vertex-centric API (spargel).
 * In this implementation, the edges carry a weight (the transition probability).
 */
@SuppressWarnings("serial")
public class SpargelPageRank {
	
	private static final double BETA = 0.85;

	
	public static void main(String[] args) throws Exception {
		final int numVertices = 100;
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		// enumerate some sample edges and assign an initial uniform probability (rank)
		DataSet> intialRanks = env.generateSequence(1, numVertices)
								.map(new MapFunction>() {
									public Tuple2 map(Long value) {
										return new Tuple2(value, 1.0/numVertices);
									}
								});
		
		// generate some random edges. the transition probability on each edge is 1/num-out-edges of the source vertex
		DataSet> edgesWithProbability = env.generateSequence(1, numVertices)
								.flatMap(new FlatMapFunction>() {
									public void flatMap(Long value, Collector> out) {
										int numOutEdges = (int) (Math.random() * (numVertices / 2));
										for (int i = 0; i < numOutEdges; i++) {
											long target = (long) (Math.random() * numVertices) + 1;
											out.collect(new Tuple3(value, target, 1.0/numOutEdges));
										}
									}
								});
		
		DataSet> result = intialRanks.runOperation(
			VertexCentricIteration.withValuedEdges(edgesWithProbability,
						new VertexRankUpdater(numVertices, BETA), new RankMessenger(), 20));
		
		result.print();
		env.execute("Spargel PageRank");
	}
	
	/**
	 * Function that updates the rank of a vertex by summing up the partial ranks from all incoming messages
	 * and then applying the dampening formula.
	 */
	public static final class VertexRankUpdater extends VertexUpdateFunction {
		
		private final long numVertices;
		private final double beta;
		
		public VertexRankUpdater(long numVertices, double beta) {
			this.numVertices = numVertices;
			this.beta = beta;
		}

		@Override
		public void updateVertex(Long vertexKey, Double vertexValue, MessageIterator inMessages) {
			double rankSum = 0.0;
			for (double msg : inMessages) {
				rankSum += msg;
			}
			
			// apply the dampening factor / random jump
			double newRank = (beta * rankSum) + (1-BETA)/numVertices;
			setNewVertexValue(newRank);
		}
	}
	
	/**
	 * Distributes the rank of a vertex among all target vertices according to the transition probability,
	 * which is associated with an edge as the edge value.
	 */
	public static final class RankMessenger extends MessagingFunction {
		
		@Override
		public void sendMessages(Long vertexId, Double newRank) {
			for (OutgoingEdge edge : getOutgoingEdges()) {
				sendMessageTo(edge.target(), newRank * edge.edgeValue());
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy