All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dstream.examples.Join Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package dstream.examples;

import static io.dstream.utils.Tuples.Tuple2.tuple2;
import static io.dstream.utils.Tuples.Tuple4.tuple4;

import java.util.List;
import java.util.Map.Entry;
import java.util.concurrent.Future;
import java.util.stream.Stream;

import io.dstream.DStream;
import io.dstream.utils.ExecutionResultUtils;
import io.dstream.utils.Tuples.Tuple2;
import io.dstream.utils.Tuples.Tuple4;
/**
 * Contains various examples of join operation
 */
public class Join {

	static String EXECUTION_NAME = "Join";

	public static void main(String[] args) throws Exception {
		//		run all
		TwoWayJoin.main();
		FourWayJoin.main();
	}

	/**
	 * This example demonstrates simple join between two streams.
	 * To ensure correctness of joining data in the distributed environment, classification must
	 * precede any type of streams combine (i.e., join and/or union*). This will ensure
	 * the two+ streams represented as individual partitions have comparable data.
	 *
	 * The following case has two data sets:
	 * 	-one-
	 * 1 Oracle
	 * 2 Amazon
	 * . . .
	 *
	 *  - two-
	 *  Arun Murthy 3
	 *  Larry Ellison 1
	 *  . . .
	 *
	 * Classification is performed using the common "id", this ensuring that
	 * '1 Oracle' and 'Larry Ellison 1' will end up in the same partition.
	 */
	public static class TwoWayJoin {
		public static void main(String... args) throws Exception {
			DStream one = DStream.ofType(String.class, "one").classify(s -> s.split("\\s+")[0]);
			DStream two = DStream.ofType(String.class, "two").classify(s -> s.split("\\s+")[2]);

			Future>>> resultFuture = one
					.join(two).on(t2 -> t2._1().split("\\s+")[0].equals(t2._2().split("\\s+")[2]))
					.executeAs(EXECUTION_NAME);

			Stream>> resultPartitionsStream = resultFuture.get();
			ExecutionResultUtils.printResults(resultPartitionsStream, true);
		}
	}

	/**
	 * This example shows a sample of joining more then two data sets with some transformation
	 * as well as multiple predicates
	 */
	public static class FourWayJoin {
		public static void main(String... args) throws Exception {
			DStream one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]);
			DStream two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]);
			DStream three = DStream.ofType(String.class, "three").classify(a -> a.split("\\s+")[0]);
			DStream four = DStream.ofType(String.class, "four").classify(a -> a.split("\\s+")[0]);

			Future>>>>> resultFuture = one
					.join(two)//.on(t2 -> t2._1().contains("Hortonworks"))
					.map(t2 -> tuple2(t2._1().toUpperCase(), t2._2().toUpperCase()))
					.join(three)
					.join(four).on(t3 -> {
						String v1 = t3._1()._1().split("\\s+")[0];
						String v2 = t3._1()._2().split("\\s+")[2];
						String v3 = t3._2().split("\\s+")[0];
						String v4 = t3._3().split("\\s+")[0];
						return v1.equals(v2) && v1.equals(v3) && v1.equals(v4);
					})
					.map(t3 -> tuple4(t3._1()._1(), t3._1()._2(), t3._2(), t3._3()))
					.aggregateValues(t4 -> t4._1(), t4 -> t4)
					.executeAs(EXECUTION_NAME);

			Stream>>>> resultPartitionsStream = resultFuture.get();
			ExecutionResultUtils.printResults(resultPartitionsStream, true);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy