org.apache.flink.streaming.api.graph.StreamGraph Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.api.graph;
import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.operators.ResourceSpec;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.InputTypeConfigurable;
import org.apache.flink.api.java.typeutils.MissingTypeInfo;
import org.apache.flink.optimizer.plan.StreamingPlan;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.operators.OutputTypeConfigurable;
import org.apache.flink.streaming.api.operators.StoppableStreamSource;
import org.apache.flink.streaming.api.operators.StreamOperator;
import org.apache.flink.streaming.api.operators.StreamSource;
import org.apache.flink.streaming.api.operators.TwoInputStreamOperator;
import org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner;
import org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner;
import org.apache.flink.streaming.runtime.partitioner.StreamPartitioner;
import org.apache.flink.streaming.runtime.tasks.OneInputStreamTask;
import org.apache.flink.streaming.runtime.tasks.SourceStreamTask;
import org.apache.flink.streaming.runtime.tasks.StoppableSourceStreamTask;
import org.apache.flink.streaming.runtime.tasks.StreamIterationHead;
import org.apache.flink.streaming.runtime.tasks.StreamIterationTail;
import org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Class representing the streaming topology. It contains all the information
* necessary to build the jobgraph for the execution.
*
*/
@Internal
public class StreamGraph extends StreamingPlan {
private static final Logger LOG = LoggerFactory.getLogger(StreamGraph.class);
private String jobName = StreamExecutionEnvironment.DEFAULT_JOB_NAME;
private final StreamExecutionEnvironment environment;
private final ExecutionConfig executionConfig;
private final CheckpointConfig checkpointConfig;
private boolean chaining;
private Map streamNodes;
private Set sources;
private Set sinks;
private Map>> virtualSelectNodes;
private Map> virtualSideOutputNodes;
private Map>> virtualPartitionNodes;
protected Map vertexIDtoBrokerID;
protected Map vertexIDtoLoopTimeout;
private StateBackend stateBackend;
private Set> iterationSourceSinkPairs;
public StreamGraph(StreamExecutionEnvironment environment) {
this.environment = environment;
this.executionConfig = environment.getConfig();
this.checkpointConfig = environment.getCheckpointConfig();
// create an empty new stream graph.
clear();
}
/**
* Remove all registered nodes etc.
*/
public void clear() {
streamNodes = new HashMap<>();
virtualSelectNodes = new HashMap<>();
virtualSideOutputNodes = new HashMap<>();
virtualPartitionNodes = new HashMap<>();
vertexIDtoBrokerID = new HashMap<>();
vertexIDtoLoopTimeout = new HashMap<>();
iterationSourceSinkPairs = new HashSet<>();
sources = new HashSet<>();
sinks = new HashSet<>();
}
public StreamExecutionEnvironment getEnvironment() {
return environment;
}
public ExecutionConfig getExecutionConfig() {
return executionConfig;
}
public CheckpointConfig getCheckpointConfig() {
return checkpointConfig;
}
public String getJobName() {
return jobName;
}
public void setJobName(String jobName) {
this.jobName = jobName;
}
public void setChaining(boolean chaining) {
this.chaining = chaining;
}
public void setStateBackend(StateBackend backend) {
this.stateBackend = backend;
}
public StateBackend getStateBackend() {
return this.stateBackend;
}
// Checkpointing
public boolean isChainingEnabled() {
return chaining;
}
public boolean isIterative() {
return !vertexIDtoLoopTimeout.isEmpty();
}
public void addSource(Integer vertexID,
String slotSharingGroup,
StreamOperator operatorObject,
TypeInformation inTypeInfo,
TypeInformation outTypeInfo,
String operatorName) {
addOperator(vertexID, slotSharingGroup, operatorObject, inTypeInfo, outTypeInfo, operatorName);
sources.add(vertexID);
}
public void addSink(Integer vertexID,
String slotSharingGroup,
StreamOperator operatorObject,
TypeInformation inTypeInfo,
TypeInformation outTypeInfo,
String operatorName) {
addOperator(vertexID, slotSharingGroup, operatorObject, inTypeInfo, outTypeInfo, operatorName);
sinks.add(vertexID);
}
public void addOperator(
Integer vertexID,
String slotSharingGroup,
StreamOperator operatorObject,
TypeInformation inTypeInfo,
TypeInformation outTypeInfo,
String operatorName) {
if (operatorObject instanceof StoppableStreamSource) {
addNode(vertexID, slotSharingGroup, StoppableSourceStreamTask.class, operatorObject, operatorName);
} else if (operatorObject instanceof StreamSource) {
addNode(vertexID, slotSharingGroup, SourceStreamTask.class, operatorObject, operatorName);
} else {
addNode(vertexID, slotSharingGroup, OneInputStreamTask.class, operatorObject, operatorName);
}
TypeSerializer inSerializer = inTypeInfo != null && !(inTypeInfo instanceof MissingTypeInfo) ? inTypeInfo.createSerializer(executionConfig) : null;
TypeSerializer outSerializer = outTypeInfo != null && !(outTypeInfo instanceof MissingTypeInfo) ? outTypeInfo.createSerializer(executionConfig) : null;
setSerializers(vertexID, inSerializer, null, outSerializer);
if (operatorObject instanceof OutputTypeConfigurable && outTypeInfo != null) {
@SuppressWarnings("unchecked")
OutputTypeConfigurable outputTypeConfigurable = (OutputTypeConfigurable) operatorObject;
// sets the output type which must be know at StreamGraph creation time
outputTypeConfigurable.setOutputType(outTypeInfo, executionConfig);
}
if (operatorObject instanceof InputTypeConfigurable) {
InputTypeConfigurable inputTypeConfigurable = (InputTypeConfigurable) operatorObject;
inputTypeConfigurable.setInputType(inTypeInfo, executionConfig);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Vertex: {}", vertexID);
}
}
public void addCoOperator(
Integer vertexID,
String slotSharingGroup,
TwoInputStreamOperator taskOperatorObject,
TypeInformation in1TypeInfo,
TypeInformation in2TypeInfo,
TypeInformation outTypeInfo,
String operatorName) {
addNode(vertexID, slotSharingGroup, TwoInputStreamTask.class, taskOperatorObject, operatorName);
TypeSerializer outSerializer = (outTypeInfo != null) && !(outTypeInfo instanceof MissingTypeInfo) ?
outTypeInfo.createSerializer(executionConfig) : null;
setSerializers(vertexID, in1TypeInfo.createSerializer(executionConfig), in2TypeInfo.createSerializer(executionConfig), outSerializer);
if (taskOperatorObject instanceof OutputTypeConfigurable) {
@SuppressWarnings("unchecked")
OutputTypeConfigurable outputTypeConfigurable = (OutputTypeConfigurable) taskOperatorObject;
// sets the output type which must be know at StreamGraph creation time
outputTypeConfigurable.setOutputType(outTypeInfo, executionConfig);
}
if (LOG.isDebugEnabled()) {
LOG.debug("CO-TASK: {}", vertexID);
}
}
protected StreamNode addNode(Integer vertexID,
String slotSharingGroup,
Class vertexClass,
StreamOperator operatorObject,
String operatorName) {
if (streamNodes.containsKey(vertexID)) {
throw new RuntimeException("Duplicate vertexID " + vertexID);
}
StreamNode vertex = new StreamNode(environment,
vertexID,
slotSharingGroup,
operatorObject,
operatorName,
new ArrayList>(),
vertexClass);
streamNodes.put(vertexID, vertex);
return vertex;
}
/**
* Adds a new virtual node that is used to connect a downstream vertex to only the outputs
* with the selected names.
*
* When adding an edge from the virtual node to a downstream node the connection will be made
* to the original node, only with the selected names given here.
*
* @param originalId ID of the node that should be connected to.
* @param virtualId ID of the virtual node.
* @param selectedNames The selected names.
*/
public void addVirtualSelectNode(Integer originalId, Integer virtualId, List selectedNames) {
if (virtualSelectNodes.containsKey(virtualId)) {
throw new IllegalStateException("Already has virtual select node with id " + virtualId);
}
virtualSelectNodes.put(virtualId,
new Tuple2>(originalId, selectedNames));
}
/**
* Adds a new virtual node that is used to connect a downstream vertex to only the outputs with
* the selected side-output {@link OutputTag}.
*
* @param originalId ID of the node that should be connected to.
* @param virtualId ID of the virtual node.
* @param outputTag The selected side-output {@code OutputTag}.
*/
public void addVirtualSideOutputNode(Integer originalId, Integer virtualId, OutputTag outputTag) {
if (virtualSideOutputNodes.containsKey(virtualId)) {
throw new IllegalStateException("Already has virtual output node with id " + virtualId);
}
// verify that we don't already have a virtual node for the given originalId/outputTag
// combination with a different TypeInformation. This would indicate that someone is trying
// to read a side output from an operation with a different type for the same side output
// id.
for (Tuple2 tag : virtualSideOutputNodes.values()) {
if (!tag.f0.equals(originalId)) {
// different source operator
continue;
}
if (tag.f1.getId().equals(outputTag.getId()) &&
!tag.f1.getTypeInfo().equals(outputTag.getTypeInfo())) {
throw new IllegalArgumentException("Trying to add a side output for the same " +
"side-output id with a different type. This is not allowed. Side-output ID: " +
tag.f1.getId());
}
}
virtualSideOutputNodes.put(virtualId, new Tuple2<>(originalId, outputTag));
}
/**
* Adds a new virtual node that is used to connect a downstream vertex to an input with a
* certain partitioning.
*
* When adding an edge from the virtual node to a downstream node the connection will be made
* to the original node, but with the partitioning given here.
*
* @param originalId ID of the node that should be connected to.
* @param virtualId ID of the virtual node.
* @param partitioner The partitioner
*/
public void addVirtualPartitionNode(Integer originalId, Integer virtualId, StreamPartitioner partitioner) {
if (virtualPartitionNodes.containsKey(virtualId)) {
throw new IllegalStateException("Already has virtual partition node with id " + virtualId);
}
virtualPartitionNodes.put(virtualId,
new Tuple2>(originalId, partitioner));
}
/**
* Determines the slot sharing group of an operation across virtual nodes.
*/
public String getSlotSharingGroup(Integer id) {
if (virtualSideOutputNodes.containsKey(id)) {
Integer mappedId = virtualSideOutputNodes.get(id).f0;
return getSlotSharingGroup(mappedId);
} else if (virtualSelectNodes.containsKey(id)) {
Integer mappedId = virtualSelectNodes.get(id).f0;
return getSlotSharingGroup(mappedId);
} else if (virtualPartitionNodes.containsKey(id)) {
Integer mappedId = virtualPartitionNodes.get(id).f0;
return getSlotSharingGroup(mappedId);
} else {
StreamNode node = getStreamNode(id);
return node.getSlotSharingGroup();
}
}
public void addEdge(Integer upStreamVertexID, Integer downStreamVertexID, int typeNumber) {
addEdgeInternal(upStreamVertexID,
downStreamVertexID,
typeNumber,
null,
new ArrayList(),
null);
}
private void addEdgeInternal(Integer upStreamVertexID,
Integer downStreamVertexID,
int typeNumber,
StreamPartitioner partitioner,
List outputNames,
OutputTag outputTag) {
if (virtualSideOutputNodes.containsKey(upStreamVertexID)) {
int virtualId = upStreamVertexID;
upStreamVertexID = virtualSideOutputNodes.get(virtualId).f0;
if (outputTag == null) {
outputTag = virtualSideOutputNodes.get(virtualId).f1;
}
addEdgeInternal(upStreamVertexID, downStreamVertexID, typeNumber, partitioner, null, outputTag);
} else if (virtualSelectNodes.containsKey(upStreamVertexID)) {
int virtualId = upStreamVertexID;
upStreamVertexID = virtualSelectNodes.get(virtualId).f0;
if (outputNames.isEmpty()) {
// selections that happen downstream override earlier selections
outputNames = virtualSelectNodes.get(virtualId).f1;
}
addEdgeInternal(upStreamVertexID, downStreamVertexID, typeNumber, partitioner, outputNames, outputTag);
} else if (virtualPartitionNodes.containsKey(upStreamVertexID)) {
int virtualId = upStreamVertexID;
upStreamVertexID = virtualPartitionNodes.get(virtualId).f0;
if (partitioner == null) {
partitioner = virtualPartitionNodes.get(virtualId).f1;
}
addEdgeInternal(upStreamVertexID, downStreamVertexID, typeNumber, partitioner, outputNames, outputTag);
} else {
StreamNode upstreamNode = getStreamNode(upStreamVertexID);
StreamNode downstreamNode = getStreamNode(downStreamVertexID);
// If no partitioner was specified and the parallelism of upstream and downstream
// operator matches use forward partitioning, use rebalance otherwise.
if (partitioner == null && upstreamNode.getParallelism() == downstreamNode.getParallelism()) {
partitioner = new ForwardPartitioner