All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.operators.DataSinkTask Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.operators;

import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.io.CleanupWhenUnsuccessful;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypeComparatorFactory;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.SimpleCounter;
import org.apache.flink.runtime.execution.CancelTaskException;
import org.apache.flink.runtime.execution.Environment;
import org.apache.flink.runtime.io.network.api.reader.MutableReader;
import org.apache.flink.runtime.io.network.api.reader.MutableRecordReader;
import org.apache.flink.runtime.io.network.partition.consumer.UnionInputGate;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.metrics.groups.OperatorIOMetricGroup;
import org.apache.flink.runtime.metrics.groups.OperatorMetricGroup;
import org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException;
import org.apache.flink.runtime.operators.sort.BlockSortedDataFileFactory;
import org.apache.flink.runtime.operators.sort.RecordComparisonMerger;
import org.apache.flink.runtime.operators.sort.SortedDataFileFactory;
import org.apache.flink.runtime.operators.sort.SortedDataFileMerger;
import org.apache.flink.runtime.operators.sort.UnilateralSortMerger;
import org.apache.flink.runtime.operators.util.CloseableInputProvider;
import org.apache.flink.runtime.operators.util.DistributedRuntimeUDFContext;
import org.apache.flink.runtime.operators.util.ReaderIterator;
import org.apache.flink.runtime.operators.util.TaskConfig;
import org.apache.flink.runtime.plugable.DeserializationDelegate;
import org.apache.flink.util.MutableObjectIterator;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * DataSinkTask which is executed by a task manager. The task hands the data to an output format.
 *
 * @see OutputFormat
 */
public class DataSinkTask extends AbstractInvokable {

	// Obtain DataSinkTask Logger
	private static final Logger LOG = LoggerFactory.getLogger(DataSinkTask.class);

	// --------------------------------------------------------------------------------------------

	// OutputFormat instance. volatile, because the asynchronous canceller may access it
	private volatile OutputFormat format;

	private MutableReader inputReader;

	// input reader
	private MutableObjectIterator reader;

	// The serializer for the input type
	private TypeSerializerFactory inputTypeSerializerFactory;

	// local strategy
	private CloseableInputProvider localStrategy;

	// task configuration
	private TaskConfig config;

	// cancel flag
	private volatile boolean taskCanceled;

	private volatile boolean cleanupCalled;

	/**
	 * Create an Invokable task and set its environment.
	 *
	 * @param environment The environment assigned to this invokable.
	 */
	public DataSinkTask(Environment environment) {
		super(environment);
	}

	@Override
	public void invoke() throws Exception {
		// --------------------------------------------------------------------
		// Initialize
		// --------------------------------------------------------------------
		LOG.debug(getLogString("Start registering input and output"));

		// initialize OutputFormat
		initOutputFormat();

		// initialize input readers
		try {
			initInputReaders();
		} catch (Exception e) {
			throw new RuntimeException("Initializing the input streams failed" +
					(e.getMessage() == null ? "." : ": " + e.getMessage()), e);
		}

		LOG.debug(getLogString("Finished registering input and output"));

		// --------------------------------------------------------------------
		// Invoke
		// --------------------------------------------------------------------
		LOG.debug(getLogString("Starting data sink operator"));

		RuntimeContext ctx = createRuntimeContext();

		final Counter numRecordsIn;
		{
			Counter tmpNumRecordsIn;
			try {
				OperatorIOMetricGroup ioMetricGroup = ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup();
				ioMetricGroup.reuseInputMetricsForTask();
				ioMetricGroup.reuseOutputMetricsForTask();
				tmpNumRecordsIn = ioMetricGroup.getNumRecordsInCounter();
			} catch (Exception e) {
				LOG.warn("An exception occurred during the metrics setup.", e);
				tmpNumRecordsIn = new SimpleCounter();
			}
			numRecordsIn = tmpNumRecordsIn;
		}

		if(RichOutputFormat.class.isAssignableFrom(this.format.getClass())){
			((RichOutputFormat) this.format).setRuntimeContext(ctx);
			LOG.debug(getLogString("Rich Sink detected. Initializing runtime context."));
		}

		ExecutionConfig executionConfig = getExecutionConfig();

		boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();

		try {
			// initialize local strategies
			MutableObjectIterator input1;
			switch (this.config.getInputLocalStrategy(0)) {
			case NONE:
				// nothing to do
				localStrategy = null;
				input1 = reader;
				break;
			case SORT:
				// initialize sort local strategy
				try {
					// get type comparator
					TypeComparatorFactory compFact = this.config.getInputComparator(0,
							getUserCodeClassLoader());
					if (compFact == null) {
						throw new Exception("Missing comparator factory for local strategy on input " + 0);
					}

					TypeComparator comparator = compFact.createComparator();
					SortedDataFileFactory sortedDataFileFactory = new BlockSortedDataFileFactory<>(
						getEnvironment().getIOManager().createChannelEnumerator(),
						this.inputTypeSerializerFactory.getSerializer(),
						getEnvironment().getIOManager());
					@SuppressWarnings({ "rawtypes", "unchecked" })
					SortedDataFileMerger merger = new RecordComparisonMerger(sortedDataFileFactory,
						getEnvironment().getIOManager(),
						this.inputTypeSerializerFactory.getSerializer(), comparator,
						this.config.getFilehandlesInput(0),
						this.getExecutionConfig().isObjectReuseEnabled());
					
					// initialize sorter
					UnilateralSortMerger sorter = new UnilateralSortMerger(
						sortedDataFileFactory, merger,
							getEnvironment().getMemoryManager(), 
							getEnvironment().getIOManager(),
							this.reader, this, this.inputTypeSerializerFactory, comparator,
							this.config.getRelativeMemoryInput(0), this.config.getFilehandlesInput(0),
							true,
							this.config.getSpillingThresholdInput(0),
							this.config.getUseLargeRecordHandler(),
							this.getExecutionConfig().isObjectReuseEnabled());

					this.localStrategy = sorter;
					input1 = sorter.getIterator();
				} catch (Exception e) {
					throw new RuntimeException("Initializing the input processing failed" +
							(e.getMessage() == null ? "." : ": " + e.getMessage()), e);
				}
				break;
			default:
				throw new RuntimeException("Invalid local strategy for DataSinkTask");
			}

			// read the reader and write it to the output

			final TypeSerializer serializer = this.inputTypeSerializerFactory.getSerializer();
			final MutableObjectIterator input = input1;
			final OutputFormat format = this.format;


			// check if task has been canceled
			if (this.taskCanceled) {
				return;
			}

			LOG.debug(getLogString("Starting to produce output"));

			// open
			format.open(this.getEnvironment().getTaskInfo().getIndexOfThisSubtask(), this.getEnvironment().getTaskInfo().getNumberOfParallelSubtasks());

			if (objectReuseEnabled) {
				IT record = serializer.createInstance();

				// work!
				while (!this.taskCanceled && ((record = input.next(record)) != null)) {
					numRecordsIn.inc();
					format.writeRecord(record);
				}
			} else {
				IT record;

				// work!
				while (!this.taskCanceled && ((record = input.next()) != null)) {
					numRecordsIn.inc();
					format.writeRecord(record);
				}
			}

			// close. We close here such that a regular close throwing an exception marks a task as failed.
			if (!this.taskCanceled) {
				this.format.close();
				this.format = null;
			}
		}
		catch (Exception ex) {

			// make a best effort to clean up
			try {
				if (!cleanupCalled && format instanceof CleanupWhenUnsuccessful) {
					cleanupCalled = true;
					((CleanupWhenUnsuccessful) format).tryCleanupOnError();
				}
			}
			catch (Throwable t) {
				LOG.error("Cleanup on error failed.", t);
			}

			ex = ExceptionInChainedStubException.exceptionUnwrap(ex);

			if (ex instanceof CancelTaskException) {
				// forward canceling exception
				throw ex;
			}
			// drop, if the task was canceled
			else if (!this.taskCanceled) {
				if (LOG.isErrorEnabled()) {
					LOG.error(getLogString("Error in user code: " + ex.getMessage()), ex);
				}
				throw ex;
			}
		}
		finally {
			if (this.format != null) {
				// close format, if it has not been closed, yet.
				// This should only be the case if we had a previous error, or were canceled.
				try {
					this.format.close();
				}
				catch (Throwable t) {
					if (LOG.isWarnEnabled()) {
						LOG.warn(getLogString("Error closing the output format"), t);
					}
				}
			}
			// close local strategy if necessary
			if (localStrategy != null) {
				try {
					this.localStrategy.close();
				} catch (Throwable t) {
					LOG.error("Error closing local strategy", t);
				}
			}

			BatchTask.clearReaders(new MutableReader[]{inputReader});
		}

		if (!this.taskCanceled) {
			LOG.debug(getLogString("Finished data sink operator"));
		}
		else {
			LOG.debug(getLogString("Data sink operator cancelled"));
		}
	}

	@Override
	public void cancel() throws Exception {
		this.taskCanceled = true;
		OutputFormat format = this.format;
		if (format != null) {
			try {
				this.format.close();
			} catch (Throwable t) {}

			// make a best effort to clean up
			try {
				if (!cleanupCalled && format instanceof CleanupWhenUnsuccessful) {
					cleanupCalled = true;
					((CleanupWhenUnsuccessful) format).tryCleanupOnError();
				}
			}
			catch (Throwable t) {
				LOG.error("Cleanup on error failed.", t);
			}
		}

		LOG.debug(getLogString("Cancelling data sink operator"));
	}

	/**
	 * Initializes the OutputFormat implementation and configuration.
	 *
	 * @throws RuntimeException
	 *         Throws if instance of OutputFormat implementation can not be
	 *         obtained.
	 */
	private void initOutputFormat() {
		ClassLoader userCodeClassLoader = getUserCodeClassLoader();
		// obtain task configuration (including stub parameters)
		Configuration taskConf = getTaskConfiguration();
		this.config = new TaskConfig(taskConf);

		try {
			this.format = config.>getStubWrapper(userCodeClassLoader).getUserCodeObject(OutputFormat.class, userCodeClassLoader);

			// check if the class is a subclass, if the check is required
			if (!OutputFormat.class.isAssignableFrom(this.format.getClass())) {
				throw new RuntimeException("The class '" + this.format.getClass().getName() + "' is not a subclass of '" +
						OutputFormat.class.getName() + "' as is required.");
			}
		}
		catch (ClassCastException ccex) {
			throw new RuntimeException("The stub class is not a proper subclass of " + OutputFormat.class.getName(), ccex);
		}

		Thread thread = Thread.currentThread();
		ClassLoader original = thread.getContextClassLoader();
		// configure the stub. catch exceptions here extra, to report them as originating from the user code
		try {
			thread.setContextClassLoader(userCodeClassLoader);
			this.format.configure(this.config.getStubParameters());
		}
		catch (Throwable t) {
			throw new RuntimeException("The user defined 'configure()' method in the Output Format caused an error: "
				+ t.getMessage(), t);
		}
		finally {
			thread.setContextClassLoader(original);
		}
	}

	/**
	 * Initializes the input readers of the DataSinkTask.
	 *
	 * @throws RuntimeException
	 *         Thrown in case of invalid task input configuration.
	 */
	@SuppressWarnings("unchecked")
	private void initInputReaders() throws Exception {
		int numGates = 0;
		//  ---------------- create the input readers ---------------------
		// in case where a logical input unions multiple physical inputs, create a union reader
		final int groupSize = this.config.getGroupSize(0);
		numGates += groupSize;
		if (groupSize == 1) {
			// non-union case
			inputReader = new MutableRecordReader>(
					getEnvironment().getInputGate(0),
					getEnvironment().getTaskManagerInfo().getTmpDirectories(),
					getEnvironment().getTaskManagerInfo().getConfiguration());
		} else if (groupSize > 1){
			// union case
			inputReader = new MutableRecordReader<>(
					new UnionInputGate(getEnvironment().getAllInputGates()),
					getEnvironment().getTaskManagerInfo().getTmpDirectories(),
					getEnvironment().getTaskManagerInfo().getConfiguration());
		} else {
			throw new Exception("Illegal input group size in task configuration: " + groupSize);
		}

		this.inputTypeSerializerFactory = this.config.getInputSerializer(0, getUserCodeClassLoader());
		@SuppressWarnings({ "rawtypes" })
		final MutableObjectIterator iter = new ReaderIterator(inputReader, this.inputTypeSerializerFactory.getSerializer());
		this.reader = (MutableObjectIterator)iter;

		// final sanity check
		if (numGates != this.config.getNumInputs()) {
			throw new Exception("Illegal configuration: Number of input gates and group sizes are not consistent.");
		}
	}

	// ------------------------------------------------------------------------
	//                               Utilities
	// ------------------------------------------------------------------------

	/**
	 * Utility function that composes a string for logging purposes. The string includes the given message and
	 * the index of the task in its task group together with the number of tasks in the task group.
	 *
	 * @param message The main message for the log.
	 * @return The string ready for logging.
	 */
	private String getLogString(String message) {
		return BatchTask.constructLogString(message, this.getEnvironment().getTaskInfo().getTaskName(), this);
	}

	public DistributedRuntimeUDFContext createRuntimeContext() {
		Environment env = getEnvironment();

		return new DistributedRuntimeUDFContext(env.getTaskInfo(), getUserCodeClassLoader(),
				getExecutionConfig(), env.getDistributedCacheEntries(), env.getAccumulatorRegistry(),
				getEnvironment().getMetricGroup().addOperator(getEnvironment().getTaskInfo().getTaskName()));
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy