org.apache.flink.streaming.examples.join.WindowJoin Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.examples.join;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.examples.join.WindowJoinSampleData.GradeSource;
import org.apache.flink.streaming.examples.join.WindowJoinSampleData.SalarySource;
/**
* Example illustrating a windowed stream join between two data streams.
*
* The example works on two input streams with pairs (name, grade) and (name, salary)
* respectively. It joins the steams based on "name" within a configurable window.
*
*
The example uses a built-in sample data generator that generates
* the steams of pairs at a configurable rate.
*/
@SuppressWarnings("serial")
public class WindowJoin {
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
// parse the parameters
final ParameterTool params = ParameterTool.fromArgs(args);
final long windowSize = params.getLong("windowSize", 2000);
final long rate = params.getLong("rate", 3L);
System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate);
System.out.println("To customize example, use: WindowJoin [--windowSize ] [--rate ]");
// obtain execution environment, run this example in "ingestion time"
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
// create the data sources for both grades and salaries
DataStream> grades = GradeSource.getSource(env, rate);
DataStream> salaries = SalarySource.getSource(env, rate);
// run the actual window join program
// for testability, this functionality is in a separate method.
DataStream> joinedStream = runWindowJoin(grades, salaries, windowSize);
// print the results with a single thread, rather than in parallel
joinedStream.print().setParallelism(1);
// execute program
env.execute("Windowed Join Example");
}
public static DataStream> runWindowJoin(
DataStream> grades,
DataStream> salaries,
long windowSize) {
return grades.join(salaries)
.where(new NameKeySelector())
.equalTo(new NameKeySelector())
.window(TumblingEventTimeWindows.of(Time.milliseconds(windowSize)))
.apply(new JoinFunction, Tuple2, Tuple3>() {
@Override
public Tuple3 join(
Tuple2 first,
Tuple2 second) {
return new Tuple3(first.f0, first.f1, second.f1);
}
});
}
private static class NameKeySelector implements KeySelector, String> {
@Override
public String getKey(Tuple2 value) {
return value.f0;
}
}
}