All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.operators.GroupReduceCombineDriver Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.operators;

import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.functions.GroupCombineFunction;
import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.runtime.memory.MemoryManager;
import org.apache.flink.runtime.operators.sort.FixedLengthRecordSorter;
import org.apache.flink.runtime.operators.sort.InMemorySorter;
import org.apache.flink.runtime.operators.sort.NormalizedKeySorter;
import org.apache.flink.runtime.operators.sort.QuickSort;
import org.apache.flink.runtime.util.NonReusingKeyGroupedIterator;
import org.apache.flink.runtime.util.ReusingKeyGroupedIterator;
import org.apache.flink.util.Collector;
import org.apache.flink.util.MutableObjectIterator;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.List;

/**
 * Non-chained combine driver which is used for a CombineGroup transformation or a GroupReduce
 * transformation where the user supplied a RichGroupReduceFunction with a combine method. The
 * combining is performed in memory with a lazy approach which only combines elements which
 * currently fit in the sorter. This may lead to a partial solution. In the case of the
 * RichGroupReduceFunction this partial result is transformed into a proper deterministic result.
 * The CombineGroup uses the GroupCombineFunction interface which allows to combine values of type
 * {@code IN} to any type of type {@code OUT}. In contrast, the RichGroupReduceFunction requires the
 * combine method to have the same input and output type to be able to reduce the elements after the
 * combine from {@code IN} to {@code OUT}.
 *
 * 

The GroupReduceCombineDriver uses a combining iterator over its input. The output of the * iterator is emitted. * * @param The data type consumed by the combiner. * @param The data type produced by the combiner. */ public class GroupReduceCombineDriver implements Driver, OUT> { private static final Logger LOG = LoggerFactory.getLogger(GroupReduceCombineDriver.class); /** * Fix length records with a length below this threshold will be in-place sorted, if possible. */ private static final int THRESHOLD_FOR_IN_PLACE_SORTING = 32; private TaskContext, OUT> taskContext; private InMemorySorter sorter; private GroupCombineFunction combiner; private TypeSerializer serializer; private TypeComparator groupingComparator; private QuickSort sortAlgo = new QuickSort(); private Collector output; private List memory; private long oversizedRecordCount; private volatile boolean running = true; private boolean objectReuseEnabled = false; // ------------------------------------------------------------------------ @Override public void setup(TaskContext, OUT> context) { this.taskContext = context; this.running = true; } @Override public int getNumberOfInputs() { return 1; } @Override public Class> getStubType() { @SuppressWarnings("unchecked") final Class> clazz = (Class>) (Class) GroupCombineFunction.class; return clazz; } @Override public int getNumberOfDriverComparators() { return 2; } @Override public void prepare() throws Exception { final DriverStrategy driverStrategy = this.taskContext.getTaskConfig().getDriverStrategy(); if (driverStrategy != DriverStrategy.SORTED_GROUP_COMBINE) { throw new Exception( "Invalid strategy " + driverStrategy + " for group reduce combiner."); } final TypeSerializerFactory serializerFactory = this.taskContext.getInputSerializer(0); this.serializer = serializerFactory.getSerializer(); final TypeComparator sortingComparator = this.taskContext.getDriverComparator(0); this.groupingComparator = this.taskContext.getDriverComparator(1); this.combiner = this.taskContext.getStub(); this.output = this.taskContext.getOutputCollector(); MemoryManager memManager = this.taskContext.getMemoryManager(); final int numMemoryPages = memManager.computeNumberOfPages( this.taskContext.getTaskConfig().getRelativeMemoryDriver()); this.memory = memManager.allocatePages(this.taskContext.getContainingTask(), numMemoryPages); // instantiate a fix-length in-place sorter, if possible, otherwise the out-of-place sorter if (sortingComparator.supportsSerializationWithKeyNormalization() && this.serializer.getLength() > 0 && this.serializer.getLength() <= THRESHOLD_FOR_IN_PLACE_SORTING) { this.sorter = new FixedLengthRecordSorter( this.serializer, sortingComparator.duplicate(), memory); } else { this.sorter = new NormalizedKeySorter( this.serializer, sortingComparator.duplicate(), memory); } ExecutionConfig executionConfig = taskContext.getExecutionConfig(); this.objectReuseEnabled = executionConfig.isObjectReuseEnabled(); if (LOG.isDebugEnabled()) { LOG.debug( "GroupReduceCombineDriver object reuse: {}.", (this.objectReuseEnabled ? "ENABLED" : "DISABLED")); } } @Override public void run() throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("Combiner starting."); } final MutableObjectIterator in = this.taskContext.getInput(0); final TypeSerializer serializer = this.serializer; if (objectReuseEnabled) { IN value = serializer.createInstance(); while (running && (value = in.next(value)) != null) { // try writing to the sorter first if (this.sorter.write(value)) { continue; } // do the actual sorting, combining, and data writing sortAndCombineAndRetryWrite(value); } } else { IN value; while (running && (value = in.next()) != null) { // try writing to the sorter first if (this.sorter.write(value)) { continue; } // do the actual sorting, combining, and data writing sortAndCombineAndRetryWrite(value); } } // sort, combine, and send the final batch if (running) { sortAndCombine(); } } private void sortAndCombine() throws Exception { if (sorter.isEmpty()) { return; } final InMemorySorter sorter = this.sorter; this.sortAlgo.sort(sorter); final GroupCombineFunction combiner = this.combiner; final Collector output = this.output; // iterate over key groups if (objectReuseEnabled) { final ReusingKeyGroupedIterator keyIter = new ReusingKeyGroupedIterator( sorter.getIterator(), this.serializer, this.groupingComparator); while (this.running && keyIter.nextKey()) { combiner.combine(keyIter.getValues(), output); } } else { final NonReusingKeyGroupedIterator keyIter = new NonReusingKeyGroupedIterator( sorter.getIterator(), this.groupingComparator); while (this.running && keyIter.nextKey()) { combiner.combine(keyIter.getValues(), output); } } } private void sortAndCombineAndRetryWrite(IN value) throws Exception { sortAndCombine(); this.sorter.reset(); // write the value again if (!this.sorter.write(value)) { ++oversizedRecordCount; LOG.debug( "Cannot write record to fresh sort buffer, record is too large. " + "Oversized record count: {}", oversizedRecordCount); // simply forward the record. We need to pass it through the combine function to convert // it Iterable input = Collections.singleton(value); this.combiner.combine(input, this.output); this.sorter.reset(); } } @Override public void cleanup() throws Exception { if (this.sorter != null) { this.sorter.dispose(); } this.taskContext.getMemoryManager().release(this.memory); } @Override public void cancel() { this.running = false; if (this.sorter != null) { try { this.sorter.dispose(); } catch (Exception e) { // may happen during concurrent modification } } this.taskContext.getMemoryManager().release(this.memory); } /** * Gets the number of oversized records handled by this combiner. * * @return The number of oversized records handled by this combiner. */ public long getOversizedRecordCount() { return oversizedRecordCount; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy