dstream.examples.Union Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dstream.examples;
import java.util.concurrent.Future;
import java.util.stream.Stream;
import io.dstream.DStream;
import io.dstream.utils.ExecutionResultUtils;
/**
* Contains various examples of join operation
*/
public class Union {
static String EXECUTION_NAME = "Union";
public static void main(String[] args) throws Exception {
// run all
SimpleTwoWayUnion.main();
}
/**
* This example demonstrates simple join between two streams.
* To ensure correctness of joining data in the distributed environment, classification must
* precede any type of streams combine (i.e., join and/or union*). This will ensure
* the two+ streams represented as individual partitions have comparable data.
*
* The following case has two data sets:
*
* -one-
* 1 Oracle
* 2 Amazon
* . . .
*
* - two-
* Arun Murthy 3
* Larry Ellison 1
* . . .
*
* Classification is performed using the common "id", this ensuring that
* '1 Oracle' and 'Larry Ellison 1' will end up in the same partition.
*
* In this example you can also see a nice side-effect of 'classification',
* since this example uses 'dstream.parallelism=3' configuration.
* Since variation of classification values matches the 'parallelism' value (3)
* the result resembles 'join' behavior since each of the three partitions
* only contain data relevant to classification id, giving you the following result:
*
* => PARTITION:0
* 3 Hortonworks
* Rob Bearden 3
* Herb Cunitz 3
* Tom McCuch 3
* Oleg Zhurakousky 3
* Arun Murthy 3
*
* => PARTITION:1
* 1 Oracle
* Larry Ellison 1
* Thomas Kurian 1
*
* => PARTITION:2
* 2 Amazon
* Jeff Bezos 2
* Jeffrey Blackburn 2
*
*/
public static class SimpleTwoWayUnion {
public static void main(String... args) throws Exception {
DStream one = DStream.ofType(String.class, "one").classify(s -> s.split("\\s+")[0]);
DStream two = DStream.ofType(String.class, "two").classify(s -> s.split("\\s+")[2]);
Future>> resultFuture = one
.union(two)
.executeAs(EXECUTION_NAME);
Stream> resultPartitionsStream = resultFuture.get();
ExecutionResultUtils.printResults(resultPartitionsStream, true);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy