All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.io.network.partition.BufferWritingResultPartition Maven / Gradle / Ivy

There is a newer version: 1.19.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.io.network.partition;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.runtime.checkpoint.CheckpointException;
import org.apache.flink.runtime.event.AbstractEvent;
import org.apache.flink.runtime.io.network.api.serialization.EventSerializer;
import org.apache.flink.runtime.io.network.buffer.BufferBuilder;
import org.apache.flink.runtime.io.network.buffer.BufferCompressor;
import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
import org.apache.flink.runtime.io.network.buffer.BufferPool;
import org.apache.flink.runtime.metrics.TimerGauge;
import org.apache.flink.runtime.metrics.groups.TaskIOMetricGroup;
import org.apache.flink.util.function.SupplierWithException;

import javax.annotation.Nullable;

import java.io.IOException;
import java.nio.ByteBuffer;

import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkElementIndex;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * A {@link ResultPartition} which writes buffers directly to {@link ResultSubpartition}s. This is
 * in contrast to implementations where records are written to a joint structure, from which the
 * subpartitions draw the data after the write phase is finished, for example the sort-based
 * partitioning.
 *
 * 

To avoid confusion: On the read side, all subpartitions return buffers (and backlog) to be * transported through the network. */ public abstract class BufferWritingResultPartition extends ResultPartition { /** The subpartitions of this partition. At least one. */ protected final ResultSubpartition[] subpartitions; /** * For non-broadcast mode, each subpartition maintains a separate BufferBuilder which might be * null. */ private final BufferBuilder[] unicastBufferBuilders; /** For broadcast mode, a single BufferBuilder is shared by all subpartitions. */ private BufferBuilder broadcastBufferBuilder; private TimerGauge hardBackPressuredTimeMsPerSecond = new TimerGauge(); private long totalWrittenBytes; public BufferWritingResultPartition( String owningTaskName, int partitionIndex, ResultPartitionID partitionId, ResultPartitionType partitionType, ResultSubpartition[] subpartitions, int numTargetKeyGroups, ResultPartitionManager partitionManager, @Nullable BufferCompressor bufferCompressor, SupplierWithException bufferPoolFactory) { super( owningTaskName, partitionIndex, partitionId, partitionType, subpartitions.length, numTargetKeyGroups, partitionManager, bufferCompressor, bufferPoolFactory); this.subpartitions = checkNotNull(subpartitions); this.unicastBufferBuilders = new BufferBuilder[subpartitions.length]; } @Override protected void setupInternal() throws IOException { checkState( bufferPool.getNumberOfRequiredMemorySegments() >= getNumberOfSubpartitions(), "Bug in result partition setup logic: Buffer pool has not enough guaranteed buffers for" + " this result partition."); } @Override public int getNumberOfQueuedBuffers() { int totalBuffers = 0; for (ResultSubpartition subpartition : subpartitions) { totalBuffers += subpartition.unsynchronizedGetNumberOfQueuedBuffers(); } return totalBuffers; } @Override public long getSizeOfQueuedBuffersUnsafe() { long totalNumberOfBytes = 0; for (ResultSubpartition subpartition : subpartitions) { totalNumberOfBytes += Math.max(0, subpartition.getTotalNumberOfBytesUnsafe()); } return totalWrittenBytes - totalNumberOfBytes; } @Override public int getNumberOfQueuedBuffers(int targetSubpartition) { checkArgument(targetSubpartition >= 0 && targetSubpartition < numSubpartitions); return subpartitions[targetSubpartition].unsynchronizedGetNumberOfQueuedBuffers(); } protected void flushSubpartition(int targetSubpartition, boolean finishProducers) { if (finishProducers) { finishBroadcastBufferBuilder(); finishUnicastBufferBuilder(targetSubpartition); } subpartitions[targetSubpartition].flush(); } protected void flushAllSubpartitions(boolean finishProducers) { if (finishProducers) { finishBroadcastBufferBuilder(); finishUnicastBufferBuilders(); } for (ResultSubpartition subpartition : subpartitions) { subpartition.flush(); } } @Override public void emitRecord(ByteBuffer record, int targetSubpartition) throws IOException { totalWrittenBytes += record.remaining(); BufferBuilder buffer = appendUnicastDataForNewRecord(record, targetSubpartition); while (record.hasRemaining()) { // full buffer, partial record finishUnicastBufferBuilder(targetSubpartition); buffer = appendUnicastDataForRecordContinuation(record, targetSubpartition); } if (buffer.isFull()) { // full buffer, full record finishUnicastBufferBuilder(targetSubpartition); } // partial buffer, full record } @Override public void broadcastRecord(ByteBuffer record) throws IOException { totalWrittenBytes += ((long) record.remaining() * numSubpartitions); BufferBuilder buffer = appendBroadcastDataForNewRecord(record); while (record.hasRemaining()) { // full buffer, partial record finishBroadcastBufferBuilder(); buffer = appendBroadcastDataForRecordContinuation(record); } if (buffer.isFull()) { // full buffer, full record finishBroadcastBufferBuilder(); } // partial buffer, full record } @Override public void broadcastEvent(AbstractEvent event, boolean isPriorityEvent) throws IOException { checkInProduceState(); finishBroadcastBufferBuilder(); finishUnicastBufferBuilders(); try (BufferConsumer eventBufferConsumer = EventSerializer.toBufferConsumer(event, isPriorityEvent)) { totalWrittenBytes += ((long) eventBufferConsumer.getWrittenBytes() * numSubpartitions); for (ResultSubpartition subpartition : subpartitions) { // Retain the buffer so that it can be recycled by each channel of targetPartition subpartition.add(eventBufferConsumer.copy(), 0); } } } @Override public void alignedBarrierTimeout(long checkpointId) throws IOException { for (ResultSubpartition subpartition : subpartitions) { subpartition.alignedBarrierTimeout(checkpointId); } } @Override public void abortCheckpoint(long checkpointId, CheckpointException cause) { for (ResultSubpartition subpartition : subpartitions) { subpartition.abortCheckpoint(checkpointId, cause); } } @Override public void setMetricGroup(TaskIOMetricGroup metrics) { super.setMetricGroup(metrics); hardBackPressuredTimeMsPerSecond = metrics.getHardBackPressuredTimePerSecond(); } @Override public ResultSubpartitionView createSubpartitionView( int subpartitionIndex, BufferAvailabilityListener availabilityListener) throws IOException { checkElementIndex(subpartitionIndex, numSubpartitions, "Subpartition not found."); checkState(!isReleased(), "Partition released."); ResultSubpartition subpartition = subpartitions[subpartitionIndex]; ResultSubpartitionView readView = subpartition.createReadView(availabilityListener); LOG.debug("Created {}", readView); return readView; } @Override public void finish() throws IOException { finishBroadcastBufferBuilder(); finishUnicastBufferBuilders(); for (ResultSubpartition subpartition : subpartitions) { subpartition.finish(); } super.finish(); } @Override protected void releaseInternal() { // Release all subpartitions for (ResultSubpartition subpartition : subpartitions) { try { subpartition.release(); } // Catch this in order to ensure that release is called on all subpartitions catch (Throwable t) { LOG.error("Error during release of result subpartition: " + t.getMessage(), t); } } } @Override public void close() { // We can not close these buffers in the release method because of the potential race // condition. This close method will be only called from the Task thread itself. if (broadcastBufferBuilder != null) { broadcastBufferBuilder.close(); broadcastBufferBuilder = null; } for (int i = 0; i < unicastBufferBuilders.length; ++i) { if (unicastBufferBuilders[i] != null) { unicastBufferBuilders[i].close(); unicastBufferBuilders[i] = null; } } super.close(); } private BufferBuilder appendUnicastDataForNewRecord( final ByteBuffer record, final int targetSubpartition) throws IOException { if (targetSubpartition < 0 || targetSubpartition > unicastBufferBuilders.length) { throw new ArrayIndexOutOfBoundsException(targetSubpartition); } BufferBuilder buffer = unicastBufferBuilders[targetSubpartition]; if (buffer == null) { buffer = requestNewUnicastBufferBuilder(targetSubpartition); addToSubpartition(buffer, targetSubpartition, 0, record.remaining()); } buffer.appendAndCommit(record); return buffer; } private void addToSubpartition( BufferBuilder buffer, int targetSubpartition, int partialRecordLength, int minDesirableBufferSize) throws IOException { int desirableBufferSize = subpartitions[targetSubpartition].add( buffer.createBufferConsumerFromBeginning(), partialRecordLength); resizeBuffer(buffer, desirableBufferSize, minDesirableBufferSize); } private void resizeBuffer( BufferBuilder buffer, int desirableBufferSize, int minDesirableBufferSize) { if (desirableBufferSize > 0) { // !! If some of partial data has written already to this buffer, the result size can // not be less than written value. buffer.trim(Math.max(minDesirableBufferSize, desirableBufferSize)); } } private BufferBuilder appendUnicastDataForRecordContinuation( final ByteBuffer remainingRecordBytes, final int targetSubpartition) throws IOException { final BufferBuilder buffer = requestNewUnicastBufferBuilder(targetSubpartition); // !! Be aware, in case of partialRecordBytes != 0, partial length and data has to // `appendAndCommit` first // before consumer is created. Otherwise it would be confused with the case the buffer // starting // with a complete record. // !! The next two lines can not change order. final int partialRecordBytes = buffer.appendAndCommit(remainingRecordBytes); addToSubpartition(buffer, targetSubpartition, partialRecordBytes, partialRecordBytes); return buffer; } private BufferBuilder appendBroadcastDataForNewRecord(final ByteBuffer record) throws IOException { BufferBuilder buffer = broadcastBufferBuilder; if (buffer == null) { buffer = requestNewBroadcastBufferBuilder(); createBroadcastBufferConsumers(buffer, 0, record.remaining()); } buffer.appendAndCommit(record); return buffer; } private BufferBuilder appendBroadcastDataForRecordContinuation( final ByteBuffer remainingRecordBytes) throws IOException { final BufferBuilder buffer = requestNewBroadcastBufferBuilder(); // !! Be aware, in case of partialRecordBytes != 0, partial length and data has to // `appendAndCommit` first // before consumer is created. Otherwise it would be confused with the case the buffer // starting // with a complete record. // !! The next two lines can not change order. final int partialRecordBytes = buffer.appendAndCommit(remainingRecordBytes); createBroadcastBufferConsumers(buffer, partialRecordBytes, partialRecordBytes); return buffer; } private void createBroadcastBufferConsumers( BufferBuilder buffer, int partialRecordBytes, int minDesirableBufferSize) throws IOException { try (final BufferConsumer consumer = buffer.createBufferConsumerFromBeginning()) { int desirableBufferSize = Integer.MAX_VALUE; for (ResultSubpartition subpartition : subpartitions) { int subPartitionBufferSize = subpartition.add(consumer.copy(), partialRecordBytes); desirableBufferSize = subPartitionBufferSize > 0 ? Math.min(desirableBufferSize, subPartitionBufferSize) : desirableBufferSize; } resizeBuffer(buffer, desirableBufferSize, minDesirableBufferSize); } } private BufferBuilder requestNewUnicastBufferBuilder(int targetSubpartition) throws IOException { checkInProduceState(); ensureUnicastMode(); final BufferBuilder bufferBuilder = requestNewBufferBuilderFromPool(targetSubpartition); unicastBufferBuilders[targetSubpartition] = bufferBuilder; return bufferBuilder; } private BufferBuilder requestNewBroadcastBufferBuilder() throws IOException { checkInProduceState(); ensureBroadcastMode(); final BufferBuilder bufferBuilder = requestNewBufferBuilderFromPool(0); broadcastBufferBuilder = bufferBuilder; return bufferBuilder; } private BufferBuilder requestNewBufferBuilderFromPool(int targetSubpartition) throws IOException { BufferBuilder bufferBuilder = bufferPool.requestBufferBuilder(targetSubpartition); if (bufferBuilder != null) { return bufferBuilder; } hardBackPressuredTimeMsPerSecond.markStart(); try { bufferBuilder = bufferPool.requestBufferBuilderBlocking(targetSubpartition); hardBackPressuredTimeMsPerSecond.markEnd(); return bufferBuilder; } catch (InterruptedException e) { throw new IOException("Interrupted while waiting for buffer"); } } private void finishUnicastBufferBuilder(int targetSubpartition) { final BufferBuilder bufferBuilder = unicastBufferBuilders[targetSubpartition]; if (bufferBuilder != null) { int bytes = bufferBuilder.finish(); resultPartitionBytes.inc(targetSubpartition, bytes); numBytesOut.inc(bytes); numBuffersOut.inc(); unicastBufferBuilders[targetSubpartition] = null; bufferBuilder.close(); } } private void finishUnicastBufferBuilders() { for (int channelIndex = 0; channelIndex < numSubpartitions; channelIndex++) { finishUnicastBufferBuilder(channelIndex); } } private void finishBroadcastBufferBuilder() { if (broadcastBufferBuilder != null) { int bytes = broadcastBufferBuilder.finish(); resultPartitionBytes.incAll(bytes); numBytesOut.inc(bytes * numSubpartitions); numBuffersOut.inc(numSubpartitions); broadcastBufferBuilder.close(); broadcastBufferBuilder = null; } } private void ensureUnicastMode() { finishBroadcastBufferBuilder(); } private void ensureBroadcastMode() { finishUnicastBufferBuilders(); } @VisibleForTesting public TimerGauge getHardBackPressuredTimeMsPerSecond() { return hardBackPressuredTimeMsPerSecond; } @VisibleForTesting public ResultSubpartition[] getAllPartitions() { return subpartitions; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy