All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.runtime.tasks.OperatorChain Maven / Gradle / Ivy

There is a newer version: 1.14.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.runtime.tasks;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.Gauge;
import org.apache.flink.metrics.SimpleCounter;
import org.apache.flink.runtime.checkpoint.CheckpointOptions;
import org.apache.flink.runtime.execution.Environment;
import org.apache.flink.runtime.io.network.api.CancelCheckpointMarker;
import org.apache.flink.runtime.io.network.api.CheckpointBarrier;
import org.apache.flink.runtime.metrics.MetricNames;
import org.apache.flink.runtime.metrics.groups.OperatorIOMetricGroup;
import org.apache.flink.runtime.metrics.groups.OperatorMetricGroup;
import org.apache.flink.runtime.plugable.SerializationDelegate;
import org.apache.flink.streaming.api.collector.selector.CopyingDirectedOutput;
import org.apache.flink.streaming.api.collector.selector.DirectedOutput;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.graph.StreamConfig;
import org.apache.flink.streaming.api.graph.StreamEdge;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.api.operators.Output;
import org.apache.flink.streaming.api.operators.StreamOperator;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.runtime.io.RecordWriterOutput;
import org.apache.flink.streaming.runtime.io.StreamRecordWriter;
import org.apache.flink.streaming.runtime.metrics.WatermarkGauge;
import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.apache.flink.streaming.runtime.streamstatus.StreamStatus;
import org.apache.flink.streaming.runtime.streamstatus.StreamStatusMaintainer;
import org.apache.flink.streaming.runtime.streamstatus.StreamStatusProvider;
import org.apache.flink.util.OutputTag;
import org.apache.flink.util.XORShiftRandom;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;

/**
 * The {@code OperatorChain} contains all operators that are executed as one chain within a single
 * {@link StreamTask}.
 *
 * @param  The type of elements accepted by the chain, i.e., the input type of the chain's
 *              head operator.
 */
@Internal
public class OperatorChain> implements StreamStatusMaintainer {

	private static final Logger LOG = LoggerFactory.getLogger(OperatorChain.class);

	private final StreamOperator[] allOperators;

	private final RecordWriterOutput[] streamOutputs;

	private final WatermarkGaugeExposingOutput> chainEntryPoint;

	private final OP headOperator;

	/**
	 * Current status of the input stream of the operator chain.
	 * Watermarks explicitly generated by operators in the chain (i.e. timestamp
	 * assigner / watermark extractors), will be blocked and not forwarded if
	 * this value is {@link StreamStatus#IDLE}.
	 */
	private StreamStatus streamStatus = StreamStatus.ACTIVE;

	public OperatorChain(
			StreamTask containingTask,
			List>>> streamRecordWriters) {

		final ClassLoader userCodeClassloader = containingTask.getUserCodeClassLoader();
		final StreamConfig configuration = containingTask.getConfiguration();

		headOperator = configuration.getStreamOperator(userCodeClassloader);

		// we read the chained configs, and the order of record writer registrations by output name
		Map chainedConfigs = configuration.getTransitiveChainedTaskConfigsWithSelf(userCodeClassloader);

		// create the final output stream writers
		// we iterate through all the out edges from this job vertex and create a stream output
		List outEdgesInOrder = configuration.getOutEdgesInOrder(userCodeClassloader);
		Map> streamOutputMap = new HashMap<>(outEdgesInOrder.size());
		this.streamOutputs = new RecordWriterOutput[outEdgesInOrder.size()];

		// from here on, we need to make sure that the output writers are shut down again on failure
		boolean success = false;
		try {
			for (int i = 0; i < outEdgesInOrder.size(); i++) {
				StreamEdge outEdge = outEdgesInOrder.get(i);

				RecordWriterOutput streamOutput = createStreamOutput(
					streamRecordWriters.get(i),
					outEdge,
					chainedConfigs.get(outEdge.getSourceId()),
					containingTask.getEnvironment());

				this.streamOutputs[i] = streamOutput;
				streamOutputMap.put(outEdge, streamOutput);
			}

			// we create the chain of operators and grab the collector that leads into the chain
			List> allOps = new ArrayList<>(chainedConfigs.size());
			this.chainEntryPoint = createOutputCollector(
				containingTask,
				configuration,
				chainedConfigs,
				userCodeClassloader,
				streamOutputMap,
				allOps);

			if (headOperator != null) {
				WatermarkGaugeExposingOutput> output = getChainEntryPoint();
				headOperator.setup(containingTask, configuration, output);

				headOperator.getMetricGroup().gauge(MetricNames.IO_CURRENT_OUTPUT_WATERMARK, output.getWatermarkGauge());
			}

			// add head operator to end of chain
			allOps.add(headOperator);

			this.allOperators = allOps.toArray(new StreamOperator[allOps.size()]);

			success = true;
		}
		finally {
			// make sure we clean up after ourselves in case of a failure after acquiring
			// the first resources
			if (!success) {
				for (RecordWriterOutput output : this.streamOutputs) {
					if (output != null) {
						output.close();
					}
				}
			}
		}

	}

	@Override
	public StreamStatus getStreamStatus() {
		return streamStatus;
	}

	@Override
	public void toggleStreamStatus(StreamStatus status) {
		if (!status.equals(this.streamStatus)) {
			this.streamStatus = status;

			// try and forward the stream status change to all outgoing connections
			for (RecordWriterOutput streamOutput : streamOutputs) {
				streamOutput.emitStreamStatus(status);
			}
		}
	}

	public void broadcastCheckpointBarrier(long id, long timestamp, CheckpointOptions checkpointOptions) throws IOException {
		try {
			CheckpointBarrier barrier = new CheckpointBarrier(id, timestamp, checkpointOptions);
			for (RecordWriterOutput streamOutput : streamOutputs) {
				streamOutput.broadcastEvent(barrier);
			}
		}
		catch (InterruptedException e) {
			throw new IOException("Interrupted while broadcasting checkpoint barrier");
		}
	}

	public void broadcastCheckpointCancelMarker(long id) throws IOException {
		try {
			CancelCheckpointMarker barrier = new CancelCheckpointMarker(id);
			for (RecordWriterOutput streamOutput : streamOutputs) {
				streamOutput.broadcastEvent(barrier);
			}
		}
		catch (InterruptedException e) {
			throw new IOException("Interrupted while broadcasting checkpoint cancellation");
		}
	}

	public RecordWriterOutput[] getStreamOutputs() {
		return streamOutputs;
	}

	public StreamOperator[] getAllOperators() {
		return allOperators;
	}

	public WatermarkGaugeExposingOutput> getChainEntryPoint() {
		return chainEntryPoint;
	}

	/**
	 * This method should be called before finishing the record emission, to make sure any data
	 * that is still buffered will be sent. It also ensures that all data sending related
	 * exceptions are recognized.
	 *
	 * @throws IOException Thrown, if the buffered data cannot be pushed into the output streams.
	 */
	public void flushOutputs() throws IOException {
		for (RecordWriterOutput streamOutput : getStreamOutputs()) {
			streamOutput.flush();
		}
	}

	/**
	 * This method releases all resources of the record writer output. It stops the output
	 * flushing thread (if there is one) and releases all buffers currently held by the output
	 * serializers.
	 *
	 * 

This method should never fail. */ public void releaseOutputs() { for (RecordWriterOutput streamOutput : streamOutputs) { streamOutput.close(); } } public OP getHeadOperator() { return headOperator; } public int getChainLength() { return allOperators == null ? 0 : allOperators.length; } // ------------------------------------------------------------------------ // initialization utilities // ------------------------------------------------------------------------ private WatermarkGaugeExposingOutput> createOutputCollector( StreamTask containingTask, StreamConfig operatorConfig, Map chainedConfigs, ClassLoader userCodeClassloader, Map> streamOutputs, List> allOperators) { List>, StreamEdge>> allOutputs = new ArrayList<>(4); // create collectors for the network outputs for (StreamEdge outputEdge : operatorConfig.getNonChainedOutputs(userCodeClassloader)) { @SuppressWarnings("unchecked") RecordWriterOutput output = (RecordWriterOutput) streamOutputs.get(outputEdge); allOutputs.add(new Tuple2<>(output, outputEdge)); } // Create collectors for the chained outputs for (StreamEdge outputEdge : operatorConfig.getChainedOutputs(userCodeClassloader)) { int outputId = outputEdge.getTargetId(); StreamConfig chainedOpConfig = chainedConfigs.get(outputId); WatermarkGaugeExposingOutput> output = createChainedOperator( containingTask, chainedOpConfig, chainedConfigs, userCodeClassloader, streamOutputs, allOperators, outputEdge.getOutputTag()); allOutputs.add(new Tuple2<>(output, outputEdge)); } // if there are multiple outputs, or the outputs are directed, we need to // wrap them as one output List> selectors = operatorConfig.getOutputSelectors(userCodeClassloader); if (selectors == null || selectors.isEmpty()) { // simple path, no selector necessary if (allOutputs.size() == 1) { return allOutputs.get(0).f0; } else { // send to N outputs. Note that this includes teh special case // of sending to zero outputs @SuppressWarnings({"unchecked", "rawtypes"}) Output>[] asArray = new Output[allOutputs.size()]; for (int i = 0; i < allOutputs.size(); i++) { asArray[i] = allOutputs.get(i).f0; } // This is the inverse of creating the normal ChainingOutput. // If the chaining output does not copy we need to copy in the broadcast output, // otherwise multi-chaining would not work correctly. if (containingTask.getExecutionConfig().isObjectReuseEnabled()) { return new CopyingBroadcastingOutputCollector<>(asArray, this); } else { return new BroadcastingOutputCollector<>(asArray, this); } } } else { // selector present, more complex routing necessary // This is the inverse of creating the normal ChainingOutput. // If the chaining output does not copy we need to copy in the broadcast output, // otherwise multi-chaining would not work correctly. if (containingTask.getExecutionConfig().isObjectReuseEnabled()) { return new CopyingDirectedOutput<>(selectors, allOutputs); } else { return new DirectedOutput<>(selectors, allOutputs); } } } private WatermarkGaugeExposingOutput> createChainedOperator( StreamTask containingTask, StreamConfig operatorConfig, Map chainedConfigs, ClassLoader userCodeClassloader, Map> streamOutputs, List> allOperators, OutputTag outputTag) { // create the output that the operator writes to first. this may recursively create more operators WatermarkGaugeExposingOutput> chainedOperatorOutput = createOutputCollector( containingTask, operatorConfig, chainedConfigs, userCodeClassloader, streamOutputs, allOperators); // now create the operator and give it the output collector to write its output to OneInputStreamOperator chainedOperator = operatorConfig.getStreamOperator(userCodeClassloader); chainedOperator.setup(containingTask, operatorConfig, chainedOperatorOutput); allOperators.add(chainedOperator); WatermarkGaugeExposingOutput> currentOperatorOutput; if (containingTask.getExecutionConfig().isObjectReuseEnabled()) { currentOperatorOutput = new ChainingOutput<>(chainedOperator, this, outputTag); } else { TypeSerializer inSerializer = operatorConfig.getTypeSerializerIn1(userCodeClassloader); currentOperatorOutput = new CopyingChainingOutput<>(chainedOperator, inSerializer, outputTag, this); } chainedOperator.getMetricGroup().gauge(MetricNames.IO_CURRENT_INPUT_WATERMARK, currentOperatorOutput.getWatermarkGauge()); chainedOperator.getMetricGroup().gauge(MetricNames.IO_CURRENT_OUTPUT_WATERMARK, chainedOperatorOutput.getWatermarkGauge()); return currentOperatorOutput; } private RecordWriterOutput createStreamOutput( StreamRecordWriter>> streamRecordWriter, StreamEdge edge, StreamConfig upStreamConfig, Environment taskEnvironment) { OutputTag sideOutputTag = edge.getOutputTag(); // OutputTag, return null if not sideOutput TypeSerializer outSerializer = null; if (edge.getOutputTag() != null) { // side output outSerializer = upStreamConfig.getTypeSerializerSideOut( edge.getOutputTag(), taskEnvironment.getUserClassLoader()); } else { // main output outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserClassLoader()); } return new RecordWriterOutput<>(streamRecordWriter, outSerializer, sideOutputTag, this); } // ------------------------------------------------------------------------ // Collectors for output chaining // ------------------------------------------------------------------------ /** * An {@link Output} that measures the last emitted watermark with a {@link WatermarkGauge}. * * @param The type of the elements that can be emitted. */ public interface WatermarkGaugeExposingOutput extends Output { Gauge getWatermarkGauge(); } private static class ChainingOutput implements WatermarkGaugeExposingOutput> { protected final OneInputStreamOperator operator; protected final Counter numRecordsIn; protected final WatermarkGauge watermarkGauge = new WatermarkGauge(); protected final StreamStatusProvider streamStatusProvider; protected final OutputTag outputTag; public ChainingOutput( OneInputStreamOperator operator, StreamStatusProvider streamStatusProvider, OutputTag outputTag) { this.operator = operator; { Counter tmpNumRecordsIn; try { OperatorIOMetricGroup ioMetricGroup = ((OperatorMetricGroup) operator.getMetricGroup()).getIOMetricGroup(); ioMetricGroup.reuseInputMetricsForTask(); ioMetricGroup.reuseOutputMetricsForTask(); tmpNumRecordsIn = ioMetricGroup.getNumRecordsInCounter(); } catch (Exception e) { LOG.warn("An exception occurred during the metrics setup.", e); tmpNumRecordsIn = new SimpleCounter(); } numRecordsIn = tmpNumRecordsIn; } this.streamStatusProvider = streamStatusProvider; this.outputTag = outputTag; } @Override public void collect(StreamRecord record) { if (this.outputTag != null) { // we are only responsible for emitting to the main input return; } pushToOperator(record); } @Override public void collect(OutputTag outputTag, StreamRecord record) { if (this.outputTag == null || !this.outputTag.equals(outputTag)) { // we are only responsible for emitting to the side-output specified by our // OutputTag. return; } pushToOperator(record); } protected void pushToOperator(StreamRecord record) { try { // we know that the given outputTag matches our OutputTag so the record // must be of the type that our operator expects. @SuppressWarnings("unchecked") StreamRecord castRecord = (StreamRecord) record; numRecordsIn.inc(); operator.setKeyContextElement1(castRecord); operator.processElement(castRecord); } catch (Exception e) { throw new ExceptionInChainedOperatorException(e); } } @Override public void emitWatermark(Watermark mark) { try { watermarkGauge.setCurrentWatermark(mark.getTimestamp()); if (streamStatusProvider.getStreamStatus().isActive()) { operator.processWatermark(mark); } } catch (Exception e) { throw new ExceptionInChainedOperatorException(e); } } @Override public void emitLatencyMarker(LatencyMarker latencyMarker) { try { operator.processLatencyMarker(latencyMarker); } catch (Exception e) { throw new ExceptionInChainedOperatorException(e); } } @Override public void close() { try { operator.close(); } catch (Exception e) { throw new ExceptionInChainedOperatorException(e); } } @Override public Gauge getWatermarkGauge() { return watermarkGauge; } } private static final class CopyingChainingOutput extends ChainingOutput { private final TypeSerializer serializer; public CopyingChainingOutput( OneInputStreamOperator operator, TypeSerializer serializer, OutputTag outputTag, StreamStatusProvider streamStatusProvider) { super(operator, streamStatusProvider, outputTag); this.serializer = serializer; } @Override public void collect(StreamRecord record) { if (this.outputTag != null) { // we are only responsible for emitting to the main input return; } pushToOperator(record); } @Override public void collect(OutputTag outputTag, StreamRecord record) { if (this.outputTag == null || !this.outputTag.equals(outputTag)) { // we are only responsible for emitting to the side-output specified by our // OutputTag. return; } pushToOperator(record); } @Override protected void pushToOperator(StreamRecord record) { try { // we know that the given outputTag matches our OutputTag so the record // must be of the type that our operator (and Serializer) expects. @SuppressWarnings("unchecked") StreamRecord castRecord = (StreamRecord) record; numRecordsIn.inc(); StreamRecord copy = castRecord.copy(serializer.copy(castRecord.getValue())); operator.setKeyContextElement1(copy); operator.processElement(copy); } catch (ClassCastException e) { if (outputTag != null) { // Enrich error message ClassCastException replace = new ClassCastException( String.format( "%s. Failed to push OutputTag with id '%s' to operator. " + "This can occur when multiple OutputTags with different types " + "but identical names are being used.", e.getMessage(), outputTag.getId())); throw new ExceptionInChainedOperatorException(replace); } else { throw new ExceptionInChainedOperatorException(e); } } catch (Exception e) { throw new ExceptionInChainedOperatorException(e); } } } private static class BroadcastingOutputCollector implements WatermarkGaugeExposingOutput> { protected final Output>[] outputs; private final Random random = new XORShiftRandom(); private final StreamStatusProvider streamStatusProvider; private final WatermarkGauge watermarkGauge = new WatermarkGauge(); public BroadcastingOutputCollector( Output>[] outputs, StreamStatusProvider streamStatusProvider) { this.outputs = outputs; this.streamStatusProvider = streamStatusProvider; } @Override public void emitWatermark(Watermark mark) { watermarkGauge.setCurrentWatermark(mark.getTimestamp()); if (streamStatusProvider.getStreamStatus().isActive()) { for (Output> output : outputs) { output.emitWatermark(mark); } } } @Override public void emitLatencyMarker(LatencyMarker latencyMarker) { if (outputs.length <= 0) { // ignore } else if (outputs.length == 1) { outputs[0].emitLatencyMarker(latencyMarker); } else { // randomly select an output outputs[random.nextInt(outputs.length)].emitLatencyMarker(latencyMarker); } } @Override public Gauge getWatermarkGauge() { return watermarkGauge; } @Override public void collect(StreamRecord record) { for (Output> output : outputs) { output.collect(record); } } @Override public void collect(OutputTag outputTag, StreamRecord record) { for (Output> output : outputs) { output.collect(outputTag, record); } } @Override public void close() { for (Output> output : outputs) { output.close(); } } } /** * Special version of {@link BroadcastingOutputCollector} that performs a shallow copy of the * {@link StreamRecord} to ensure that multi-chaining works correctly. */ private static final class CopyingBroadcastingOutputCollector extends BroadcastingOutputCollector { public CopyingBroadcastingOutputCollector( Output>[] outputs, StreamStatusProvider streamStatusProvider) { super(outputs, streamStatusProvider); } @Override public void collect(StreamRecord record) { for (int i = 0; i < outputs.length - 1; i++) { Output> output = outputs[i]; StreamRecord shallowCopy = record.copy(record.getValue()); output.collect(shallowCopy); } if (outputs.length > 0) { // don't copy for the last output outputs[outputs.length - 1].collect(record); } } @Override public void collect(OutputTag outputTag, StreamRecord record) { for (int i = 0; i < outputs.length - 1; i++) { Output> output = outputs[i]; StreamRecord shallowCopy = record.copy(record.getValue()); output.collect(outputTag, shallowCopy); } if (outputs.length > 0) { // don't copy for the last output outputs[outputs.length - 1].collect(outputTag, record); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy