All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.io.network.partition.BoundedBlockingSubpartition Maven / Gradle / Ivy

There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.io.network.partition;

import org.apache.flink.runtime.io.network.api.EndOfPartitionEvent;
import org.apache.flink.runtime.io.network.api.serialization.EventSerializer;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
import org.apache.flink.util.FlinkRuntimeException;

import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * An implementation of the ResultSubpartition for a bounded result transferred
 * in a blocking manner: The result is first produced, then consumed.
 * The result can be consumed possibly multiple times.
 *
 * 

Depending on the supplied implementation of {@link BoundedData}, the actual data is stored * for example in a file, or in a temporary memory mapped file. * *

Important Notes on Thread Safety

* *

This class does not synchronize every buffer access. It assumes the threading model of the * Flink network stack and is not thread-safe beyond that. * *

This class assumes a single writer thread that adds buffers, flushes, and finishes the write * phase. That same thread is also assumed to perform the partition release, if the release happens * during the write phase. * *

The implementation supports multiple concurrent readers, but assumes a single * thread per reader. That same thread must also release the reader. In particular, after the reader * was released, no buffers obtained from this reader may be accessed any more, or segmentation * faults might occur in some implementations. * *

The method calls to create readers, dispose readers, and dispose the partition are * thread-safe vis-a-vis each other. */ final class BoundedBlockingSubpartition extends ResultSubpartition { /** This lock guards the creation of readers and disposal of the memory mapped file. */ private final Object lock = new Object(); /** The current buffer, may be filled further over time. */ @Nullable private BufferConsumer currentBuffer; /** The bounded data store that we store the data in. */ private final BoundedData data; /** All created and not yet released readers. */ @GuardedBy("lock") private final Set readers; /** Counter for the number of data buffers (not events!) written. */ private int numDataBuffersWritten; /** The counter for the number of data buffers and events. */ private int numBuffersAndEventsWritten; /** Flag indicating whether the writing has finished and this is now available for read. */ private boolean isFinished; /** Flag indicating whether the subpartition has been released. */ private boolean isReleased; public BoundedBlockingSubpartition( int index, ResultPartition parent, BoundedData data) { super(index, parent); this.data = checkNotNull(data); this.readers = new HashSet<>(); } // ------------------------------------------------------------------------ /** * Checks if writing is finished. * Readers cannot be created until writing is finished, and no further writes can happen after that. */ public boolean isFinished() { return isFinished; } @Override public boolean isReleased() { return isReleased; } @Override public boolean add(BufferConsumer bufferConsumer) throws IOException { if (isFinished()) { bufferConsumer.close(); return false; } flushCurrentBuffer(); currentBuffer = bufferConsumer; return true; } @Override public void flush() { // unfortunately, the signature of flush does not allow for any exceptions, so we // need to do this discouraged pattern of runtime exception wrapping try { flushCurrentBuffer(); } catch (IOException e) { throw new FlinkRuntimeException(e.getMessage(), e); } } private void flushCurrentBuffer() throws IOException { if (currentBuffer != null) { writeAndCloseBufferConsumer(currentBuffer); currentBuffer = null; } } private void writeAndCloseBufferConsumer(BufferConsumer bufferConsumer) throws IOException { try { final Buffer buffer = bufferConsumer.build(); try { if (canBeCompressed(buffer)) { final Buffer compressedBuffer = parent.bufferCompressor.compressToIntermediateBuffer(buffer); data.writeBuffer(compressedBuffer); if (compressedBuffer != buffer) { compressedBuffer.recycleBuffer(); } } else { data.writeBuffer(buffer); } numBuffersAndEventsWritten++; if (buffer.isBuffer()) { numDataBuffersWritten++; } } finally { buffer.recycleBuffer(); } } finally { bufferConsumer.close(); } } @Override public void finish() throws IOException { checkState(!isReleased, "data partition already released"); checkState(!isFinished, "data partition already finished"); isFinished = true; flushCurrentBuffer(); writeAndCloseBufferConsumer(EventSerializer.toBufferConsumer(EndOfPartitionEvent.INSTANCE)); data.finishWrite(); } @Override public void release() throws IOException { synchronized (lock) { if (isReleased) { return; } isReleased = true; isFinished = true; // for fail fast writes checkReaderReferencesAndDispose(); } } @Override public ResultSubpartitionView createReadView(BufferAvailabilityListener availability) throws IOException { synchronized (lock) { checkState(!isReleased, "data partition already released"); checkState(isFinished, "writing of blocking partition not yet finished"); availability.notifyDataAvailable(); final BoundedBlockingSubpartitionReader reader = new BoundedBlockingSubpartitionReader( this, data, numDataBuffersWritten, availability); readers.add(reader); return reader; } } void releaseReaderReference(BoundedBlockingSubpartitionReader reader) throws IOException { onConsumedSubpartition(); synchronized (lock) { if (readers.remove(reader) && isReleased) { checkReaderReferencesAndDispose(); } } } @GuardedBy("lock") private void checkReaderReferencesAndDispose() throws IOException { assert Thread.holdsLock(lock); // To avoid lingering memory mapped files (large resource footprint), we don't // wait for GC to unmap the files, but use a Netty utility to directly unmap the file. // To avoid segmentation faults, we need to wait until all readers have been released. if (readers.isEmpty()) { data.close(); } } // ------------------------------ legacy ---------------------------------- @Override public int releaseMemory() throws IOException { return 0; } // ---------------------------- statistics -------------------------------- @Override public int unsynchronizedGetNumberOfQueuedBuffers() { return 0; } @Override protected long getTotalNumberOfBuffers() { return numBuffersAndEventsWritten; } @Override protected long getTotalNumberOfBytes() { return data.getSize(); } int getBuffersInBacklog() { return numDataBuffersWritten; } // ---------------------------- factories -------------------------------- /** * Creates a BoundedBlockingSubpartition that simply stores the partition data in a file. * Data is eagerly spilled (written to disk) and readers directly read from the file. */ public static BoundedBlockingSubpartition createWithFileChannel( int index, ResultPartition parent, File tempFile, int readBufferSize) throws IOException { final FileChannelBoundedData bd = FileChannelBoundedData.create(tempFile.toPath(), readBufferSize); return new BoundedBlockingSubpartition(index, parent, bd); } /** * Creates a BoundedBlockingSubpartition that stores the partition data in memory mapped file. * Data is written to and read from the mapped memory region. Disk spilling happens lazily, when the * OS swaps out the pages from the memory mapped file. */ public static BoundedBlockingSubpartition createWithMemoryMappedFile( int index, ResultPartition parent, File tempFile) throws IOException { final MemoryMappedBoundedData bd = MemoryMappedBoundedData.create(tempFile.toPath()); return new BoundedBlockingSubpartition(index, parent, bd); } /** * Creates a BoundedBlockingSubpartition that stores the partition data in a file and * memory maps that file for reading. * Data is eagerly spilled (written to disk) and then mapped into memory. The main * difference to the {@link #createWithMemoryMappedFile(int, ResultPartition, File)} variant * is that no I/O is necessary when pages from the memory mapped file are evicted. */ public static BoundedBlockingSubpartition createWithFileAndMemoryMappedReader( int index, ResultPartition parent, File tempFile) throws IOException { final FileChannelMemoryMappedBoundedData bd = FileChannelMemoryMappedBoundedData.create(tempFile.toPath()); return new BoundedBlockingSubpartition(index, parent, bd); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy