All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.graph.library.PageRank Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.graph.library;

import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.graph.Edge;
import org.apache.flink.graph.EdgeJoinFunction;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.GraphAlgorithm;
import org.apache.flink.graph.Vertex;
import org.apache.flink.graph.spargel.GatherFunction;
import org.apache.flink.graph.spargel.MessageIterator;
import org.apache.flink.graph.spargel.ScatterFunction;
import org.apache.flink.graph.spargel.ScatterGatherConfiguration;
import org.apache.flink.types.LongValue;

/**
 * This is an implementation of a simple PageRank algorithm, using a scatter-gather iteration.
 * The user can define the damping factor and the maximum number of iterations.
 *
 * The implementation assumes that each page has at least one incoming and one outgoing link.
 */
public class PageRank implements GraphAlgorithm>> {

	private double beta;
	private int maxIterations;

	/**
	 * Creates an instance of the PageRank algorithm.
	 *
	 * The implementation assumes that each page has at least one incoming and one outgoing link.
	 * 
	 * @param beta the damping factor
	 * @param maxIterations the maximum number of iterations
	 */
	public PageRank(double beta, int maxIterations) {
		this.beta = beta;
		this.maxIterations = maxIterations;
	}

	@Override
	public DataSet> run(Graph network) throws Exception {
		DataSet> vertexOutDegrees = network.outDegrees();

		Graph networkWithWeights = network
				.joinWithEdgesOnSource(vertexOutDegrees, new InitWeights());

		ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
		parameters.setOptNumVertices(true);

		return networkWithWeights.runScatterGatherIteration(new RankMessenger(),
				new VertexRankUpdater(beta), maxIterations, parameters)
				.getVertices();
	}

	/**
	 * Distributes the rank of a vertex among all target vertices according to
	 * the transition probability, which is associated with an edge as the edge
	 * value.
	 */
	@SuppressWarnings("serial")
	public static final class RankMessenger extends ScatterFunction {
		@Override
		public void sendMessages(Vertex vertex) {
			if (getSuperstepNumber() == 1) {
				// initialize vertex ranks
				vertex.setValue(1.0 / this.getNumberOfVertices());
			}

			for (Edge edge : getEdges()) {
				sendMessageTo(edge.getTarget(), vertex.getValue() * edge.getValue());
			}
		}
	}

	/**
	 * Function that updates the rank of a vertex by summing up the partial
	 * ranks from all incoming messages and then applying the dampening formula.
	 */
	@SuppressWarnings("serial")
	public static final class VertexRankUpdater extends GatherFunction {
		private final double beta;

		public VertexRankUpdater(double beta) {
			this.beta = beta;
		}

		@Override
		public void updateVertex(Vertex vertex, MessageIterator inMessages) {
			double rankSum = 0.0;
			for (double msg : inMessages) {
				rankSum += msg;
			}

			// apply the dampening factor / random jump
			double newRank = (beta * rankSum) + (1 - beta) / this.getNumberOfVertices();
			setNewVertexValue(newRank);
		}
	}

	@SuppressWarnings("serial")
	private static final class InitWeights implements EdgeJoinFunction {
		public Double edgeJoin(Double edgeValue, LongValue inputValue) {
			return edgeValue / (double) inputValue.getValue();
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy