All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.operators.chaining.GroupCombineChainedDriver Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.operators.chaining;

import org.apache.flink.api.common.functions.Function;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.util.FunctionUtils;
import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypeComparatorFactory;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.memorymanager.MemoryManager;
import org.apache.flink.runtime.operators.RegularPactTask;
import org.apache.flink.runtime.operators.sort.FixedLengthRecordSorter;
import org.apache.flink.runtime.operators.sort.InMemorySorter;
import org.apache.flink.runtime.operators.sort.NormalizedKeySorter;
import org.apache.flink.runtime.operators.sort.QuickSort;
import org.apache.flink.runtime.util.NonReusingKeyGroupedIterator;
import org.apache.flink.runtime.util.ReusingKeyGroupedIterator;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.List;

/**
 * Chained variant of the GroupCombineDriver
 * 
 * Acts like a combiner with a custom output type OUT.
 *
 * Sorting and reducing of the elements is performed invididually for each partition without data exchange. This may
 * lead to a partial group reduce.
 *  
 * @param  The data type consumed
 * @param  The data type produced
 */
public class GroupCombineChainedDriver extends ChainedDriver {

	private static final Logger LOG = LoggerFactory.getLogger(GroupCombineChainedDriver.class);

	/**
	 * Fix length records with a length below this threshold will be in-place sorted, if possible.
	 */
	private static final int THRESHOLD_FOR_IN_PLACE_SORTING = 32;

	// --------------------------------------------------------------------------------------------

	private InMemorySorter sorter;

	private GroupReduceFunction reducer;

	private TypeSerializer serializer;

	private TypeComparator sortingComparator;

	private TypeComparator groupingComparator;

	private AbstractInvokable parent;

	private QuickSort sortAlgo = new QuickSort();

	private MemoryManager memManager;

	private volatile boolean running = true;

	// --------------------------------------------------------------------------------------------

	@Override
	public void setup(AbstractInvokable parent) {
		this.parent = parent;

		@SuppressWarnings("unchecked")
		final GroupReduceFunction combiner =
			RegularPactTask.instantiateUserCode(this.config, userCodeClassLoader, GroupReduceFunction.class);
		this.reducer = combiner;
		FunctionUtils.setFunctionRuntimeContext(combiner, getUdfRuntimeContext());
	}

	@Override
	public void openTask() throws Exception {
		// open the stub first
		final Configuration stubConfig = this.config.getStubParameters();
		RegularPactTask.openUserCode(this.reducer, stubConfig);

		// ----------------- Set up the asynchronous sorter -------------------------

		this.memManager = this.parent.getEnvironment().getMemoryManager();
		final int numMemoryPages = memManager.computeNumberOfPages(this.config.getRelativeMemoryDriver());

		// instantiate the serializer / comparator
		final TypeSerializerFactory serializerFactory = this.config.getInputSerializer(0, this.userCodeClassLoader);
		final TypeComparatorFactory sortingComparatorFactory = this.config.getDriverComparator(0, this.userCodeClassLoader);
		final TypeComparatorFactory groupingComparatorFactory = this.config.getDriverComparator(1, this.userCodeClassLoader);
		this.serializer = serializerFactory.getSerializer();
		this.sortingComparator = sortingComparatorFactory.createComparator();
		this.groupingComparator = groupingComparatorFactory.createComparator();

		final List memory = this.memManager.allocatePages(this.parent, numMemoryPages);

		// instantiate a fix-length in-place sorter, if possible, otherwise the out-of-place sorter
		if (this.sortingComparator.supportsSerializationWithKeyNormalization() &&
			this.serializer.getLength() > 0 && this.serializer.getLength() <= THRESHOLD_FOR_IN_PLACE_SORTING)
		{
			this.sorter = new FixedLengthRecordSorter(this.serializer, this.sortingComparator, memory);
		} else {
			this.sorter = new NormalizedKeySorter(this.serializer, this.sortingComparator.duplicate(), memory);
		}

		if (LOG.isDebugEnabled()) {
			LOG.debug("SynchronousChainedCombineDriver object reuse: " + (this.objectReuseEnabled ? "ENABLED" : "DISABLED") + ".");
		}
	}

	@Override
	public void closeTask() throws Exception {
		this.memManager.release(this.sorter.dispose());

		if (!this.running) {
			return;
		}

		RegularPactTask.closeUserCode(this.reducer);
	}

	@Override
	public void cancelTask() {
		this.running = false;
		this.memManager.release(this.sorter.dispose());
	}

	// --------------------------------------------------------------------------------------------

	public Function getStub() {
		return this.reducer;
	}

	public String getTaskName() {
		return this.taskName;
	}

	@Override
	public void collect(IN record) {
		// try writing to the sorter first
		try {
			if (this.sorter.write(record)) {
				return;
			}
		} catch (IOException e) {
			throw new ExceptionInChainedStubException(this.taskName, e);
		}

		// do the actual sorting
		try {
			sortAndReduce();
		} catch (Exception e) {
			throw new ExceptionInChainedStubException(this.taskName, e);
		}
		this.sorter.reset();

		try {
			if (!this.sorter.write(record)) {
				throw new IOException("Cannot write record to fresh sort buffer. Record too large.");
			}
		} catch (IOException e) {
			throw new ExceptionInChainedStubException(this.taskName, e);
		}
	}

	// --------------------------------------------------------------------------------------------

	@Override
	public void close() {
		try {
			sortAndReduce();
		} catch (Exception e) {
			throw new ExceptionInChainedStubException(this.taskName, e);
		}

		this.outputCollector.close();
	}

	private void sortAndReduce() throws Exception {
		final InMemorySorter sorter = this.sorter;

		if (objectReuseEnabled) {
			if (!sorter.isEmpty()) {
				this.sortAlgo.sort(sorter);
				// run the reducer
				final ReusingKeyGroupedIterator keyIter = new ReusingKeyGroupedIterator(sorter.getIterator(), this.serializer, this.groupingComparator);


				// cache references on the stack
				final GroupReduceFunction stub = this.reducer;
				final Collector output = this.outputCollector;

				// run stub implementation
				while (this.running && keyIter.nextKey()) {
					stub.reduce(keyIter.getValues(), output);
				}
			}
		} else {
			if (!sorter.isEmpty()) {
				this.sortAlgo.sort(sorter);
				// run the reducer
				final NonReusingKeyGroupedIterator keyIter = new NonReusingKeyGroupedIterator(sorter.getIterator(), this.groupingComparator);


				// cache references on the stack
				final GroupReduceFunction stub = this.reducer;
				final Collector output = this.outputCollector;

				// run stub implementation
				while (this.running && keyIter.nextKey()) {
					stub.reduce(keyIter.getValues(), output);
				}
			}
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy