All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.graph.example.LabelPropagation Maven / Gradle / Ivy

There is a newer version: 1.16.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.graph.example;

import org.apache.flink.api.common.ProgramDescription;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.graph.Edge;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.Vertex;
import org.apache.flink.graph.library.LabelPropagationAlgorithm;
import org.apache.flink.graph.utils.Tuple2ToVertexMap;
import org.apache.flink.types.NullValue;
import org.apache.flink.util.Collector;

/**
 * This example uses the label propagation algorithm to detect communities by
 * propagating labels. Initially, each vertex is assigned its id as its label.
 * The vertices iteratively propagate their labels to their neighbors and adopt
 * the most frequent label among their neighbors. The algorithm converges when
 * no vertex changes value or the maximum number of iterations have been
 * reached.
 *
 * The edges input file is expected to contain one edge per line, with long IDs
 * in the following format:"\t".
 *
 * The vertices input file is expected to contain one vertex per line, with long IDs
 * and long vertex values, in the following format:"\t".
 *
 * If no arguments are provided, the example runs with a random graph of 100 vertices.
 */
public class LabelPropagation implements ProgramDescription {

	public static void main(String[] args) throws Exception {

		if(!parseParameters(args)) {
			return;
		}

		// Set up the execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// Set up the graph
		DataSet> vertices = getVertexDataSet(env);
		DataSet> edges = getEdgeDataSet(env);

		Graph graph = Graph.fromDataSet(vertices, edges,	env);

		// Set up the program
		DataSet> verticesWithCommunity = graph.run(
				new LabelPropagationAlgorithm(maxIterations)).getVertices();

		// Emit results
		if(fileOutput) {
			verticesWithCommunity.writeAsCsv(outputPath, "\n", ",");

			// Execute the program
			env.execute("Label Propagation Example");
		} else {
			verticesWithCommunity.print();
		}

	}

	// *************************************************************************
	//     UTIL METHODS
	// *************************************************************************

	private static boolean fileOutput = false;
	private static String vertexInputPath = null;
	private static String edgeInputPath = null;
	private static String outputPath = null;
	private static long numVertices = 100;
	private static int maxIterations = 10;

	private static boolean parseParameters(String[] args) {

		if(args.length > 0) {
			if(args.length != 4) {
				System.err.println("Usage: LabelPropagation    ");
				return false;
			}

			fileOutput = true;
			vertexInputPath = args[0];
			edgeInputPath = args[1];
			outputPath = args[2];
			maxIterations = Integer.parseInt(args[3]);
		} else {
			System.out.println("Executing LabelPropagation example with default parameters and built-in default data.");
			System.out.println("  Provide parameters to read input data from files.");
			System.out.println("  See the documentation for the correct format of input files.");
			System.out.println("  Usage: LabelPropagation    ");
		}
		return true;
	}

	@SuppressWarnings("serial")
	private static DataSet> getVertexDataSet(ExecutionEnvironment env) {

		if (fileOutput) {
			return env.readCsvFile(vertexInputPath)
					.fieldDelimiter("\t")
					.lineDelimiter("\n")
					.types(Long.class, Long.class)
					.map(new Tuple2ToVertexMap());
		}

		return env.generateSequence(1, numVertices).map(
				new MapFunction>() {
					public Vertex map(Long l) throws Exception {
						return new Vertex(l, l);
					}
				});
	}

	@SuppressWarnings("serial")
	private static DataSet> getEdgeDataSet(ExecutionEnvironment env) {

		if (fileOutput) {
			return env.readCsvFile(edgeInputPath)
					.fieldDelimiter("\t")
					.lineDelimiter("\n")
					.types(Long.class, Long.class)
					.map(new MapFunction, Edge>() {
						@Override
						public Edge map(Tuple2 value) throws Exception {
							return new Edge(value.f0, value.f1, NullValue.getInstance());
						}
					});
		}

		return env.generateSequence(1, numVertices).flatMap(
				new FlatMapFunction>() {
					@Override
					public void flatMap(Long key,
							Collector> out) {
						int numOutEdges = (int) (Math.random() * (numVertices / 2));
						for (int i = 0; i < numOutEdges; i++) {
							long target = (long) (Math.random() * numVertices) + 1;
							out.collect(new Edge(key, target,
									NullValue.getInstance()));
						}
					}
				});
	}

	@Override
	public String getDescription() {
		return "Label Propagation Example";
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy