All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.graph.library.LabelPropagation Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.graph.library;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.GraphAlgorithm;
import org.apache.flink.graph.Vertex;
import org.apache.flink.graph.spargel.MessageIterator;
import org.apache.flink.graph.spargel.MessagingFunction;
import org.apache.flink.graph.spargel.VertexUpdateFunction;
import org.apache.flink.graph.utils.NullValueEdgeMapper;
import org.apache.flink.types.NullValue;

import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

/**
 * An implementation of the label propagation algorithm. The iterative algorithm
 * detects communities by propagating labels. In each iteration, a vertex adopts
 * the label that is most frequent among its neighbors' labels.
 *
 * The initial vertex values are used as initial labels and are expected to be
 * {@link Comparable}. In case of a tie (i.e. two or more labels appear with the
 * same frequency), the algorithm picks the greater label. The algorithm converges
 * when no vertex changes its value or the maximum number of iterations has been
 * reached. Note that different initializations might lead to different results.
 *
 * @param  	vertex identifier type
 * @param  	vertex value type which is used for comparison
 * @param  	edge value type
 */
@SuppressWarnings("serial")
public class LabelPropagation, EV>
	implements GraphAlgorithm>> {

	private final int maxIterations;

	/**
	 * Creates a new Label Propagation algorithm instance.
	 * The algorithm converges when vertices no longer update their value
	 * or when the maximum number of iterations is reached.
	 * 
	 * @see 
	 * Near linear time algorithm to detect community structures in large-scale networks
	 * 
	 * @param maxIterations The maximum number of iterations to run.
	 */
	public LabelPropagation(int maxIterations) {
		this.maxIterations = maxIterations;
	}

	@Override
	public DataSet> run(Graph input) {

		TypeInformation valueType = ((TupleTypeInfo) input.getVertices().getType()).getTypeAt(1);
		// iteratively adopt the most frequent label among the neighbors of each vertex
		return input
			.mapEdges(new NullValueEdgeMapper())
			.runScatterGatherIteration(
				new UpdateVertexLabel(), new SendNewLabelToNeighbors(valueType), maxIterations)
			.getVertices();
	}

	/**
	 * Function that updates the value of a vertex by adopting the most frequent
	 * label among its in-neighbors
	 */
	public static final class UpdateVertexLabel> extends VertexUpdateFunction {

		public void updateVertex(Vertex vertex, MessageIterator inMessages) {
			Map labelsWithFrequencies = new HashMap();

			long maxFrequency = 1;
			VV mostFrequentLabel = vertex.getValue();

			// store the labels with their frequencies
			for (VV msg : inMessages) {
				if (labelsWithFrequencies.containsKey(msg)) {
					long currentFreq = labelsWithFrequencies.get(msg);
					labelsWithFrequencies.put(msg, currentFreq + 1);
				} else {
					labelsWithFrequencies.put(msg, 1L);
				}
			}
			// select the most frequent label: if two or more labels have the
			// same frequency, the node adopts the label with the highest value
			for (Entry entry : labelsWithFrequencies.entrySet()) {
				if (entry.getValue() == maxFrequency) {
					// check the label value to break ties
					if (entry.getKey().compareTo(mostFrequentLabel) > 0) {
						mostFrequentLabel = entry.getKey();
					}
				} else if (entry.getValue() > maxFrequency) {
					maxFrequency = entry.getValue();
					mostFrequentLabel = entry.getKey();
				}
			}
			setNewVertexValue(mostFrequentLabel);
		}
	}

	/**
	 * Sends the vertex label to all out-neighbors
	 */
	public static final class SendNewLabelToNeighbors>
		extends MessagingFunction
		implements ResultTypeQueryable {

		private final TypeInformation typeInformation;

		public SendNewLabelToNeighbors(TypeInformation typeInformation) {
			this.typeInformation = typeInformation;
		}

		public void sendMessages(Vertex vertex) {
			sendMessageToAllNeighbors(vertex.getValue());
		}

		@Override
		public TypeInformation getProducedType() {
			return typeInformation;
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy